summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gk20a.h
diff options
context:
space:
mode:
authorAnton Vorontsov <avorontsov@nvidia.com>2015-08-19 17:27:51 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-03-23 10:48:47 -0400
commit1c40d09c4c9c011c1318c328c0b4b6b17d1f537e (patch)
tree8b93fcd00739f9ada9302f06175278c9cb1d6785 /drivers/gpu/nvgpu/gk20a/gk20a.h
parent82da6ed595a87c8a3038eecd75880ab21dd4c5de (diff)
gpu: nvgpu: Add support for FECS ctxsw tracing
bug 1648908 This commit adds support for FECS ctxsw tracing. Code is compiled conditionnaly under CONFIG_GK20_CTXSW_TRACE. This feature requires an updated FECS ucode that writes one record to a ring buffer on each context switch. On RM/Kernel side, the GPU driver reads records from the master ring buffer and generates trace entries into a user-facing VM ring buffer. For each record in the master ring buffer, RM/Kernel has to retrieve the vmid+pid of the user process that submitted related work. Features currently implemented: - master ring buffer allocation - debugfs to dump master ring buffer - FECS record per context switch (with both current and new contexts) - dedicated device for ctxsw tracing (access to VM ring buffer) - SOF generation (and access to PTIMER) - VM ring buffer allocation, and reconfiguration - enable/disable tracing at user level - event-based trace filtering - context_ptr to vmid+pid mapping - read system call for ctxsw dev - mmap system call for ctxsw dev (direct access to VM ring buffer) - poll system call for ctxsw dev - save/restore register on ELPG/CG6 - separate user ring from FECS ring handling Features requiring ucode changes: - enable/disable tracing at FECS level - actual busy time on engine (bug 1642354) - master ring buffer threshold interrupt (P1) - API for GPU to CPU timestamp conversion (P1) - vmid/pid/uid based filtering (P1) Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1022737 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h26
1 files changed, 26 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8b87c7aa..541e7b50 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -25,6 +25,8 @@ struct channel_gk20a;
25struct gr_gk20a; 25struct gr_gk20a;
26struct sim_gk20a; 26struct sim_gk20a;
27struct gk20a_ctxsw_ucode_segments; 27struct gk20a_ctxsw_ucode_segments;
28struct gk20a_fecs_trace;
29struct gk20a_ctxsw_trace;
28struct acr_gm20b; 30struct acr_gm20b;
29 31
30#include <linux/sched.h> 32#include <linux/sched.h>
@@ -373,6 +375,19 @@ struct gpu_ops {
373 bool use_dma_for_fw_bootstrap; 375 bool use_dma_for_fw_bootstrap;
374 } gr_ctx; 376 } gr_ctx;
375 struct { 377 struct {
378 int (*init)(struct gk20a *g);
379 int (*max_entries)(struct gk20a *,
380 struct nvgpu_ctxsw_trace_filter *);
381 int (*flush)(struct gk20a *g);
382 int (*poll)(struct gk20a *g);
383 int (*enable)(struct gk20a *g);
384 int (*disable)(struct gk20a *g);
385 int (*reset)(struct gk20a *g);
386 int (*bind_channel)(struct gk20a *, struct channel_gk20a *);
387 int (*unbind_channel)(struct gk20a *, struct channel_gk20a *);
388 int (*deinit)(struct gk20a *g);
389 } fecs_trace;
390 struct {
376 bool (*support_sparse)(struct gk20a *g); 391 bool (*support_sparse)(struct gk20a *g);
377 bool (*is_debug_mode_enabled)(struct gk20a *g); 392 bool (*is_debug_mode_enabled)(struct gk20a *g);
378 void (*set_debug_mode)(struct gk20a *g, bool enable); 393 void (*set_debug_mode)(struct gk20a *g, bool enable);
@@ -613,6 +628,11 @@ struct gk20a {
613 struct device *node; 628 struct device *node;
614 } tsg; 629 } tsg;
615 630
631 struct {
632 struct cdev cdev;
633 struct device *node;
634 } ctxsw;
635
616 struct mutex client_lock; 636 struct mutex client_lock;
617 int client_refcount; /* open channels and ctrl nodes */ 637 int client_refcount; /* open channels and ctrl nodes */
618 638
@@ -639,6 +659,9 @@ struct gk20a {
639 659
640 struct gk20a_scale_profile *scale_profile; 660 struct gk20a_scale_profile *scale_profile;
641 661
662 struct gk20a_ctxsw_trace *ctxsw_trace;
663 struct gk20a_fecs_trace *fecs_trace;
664
642 struct device_dma_parameters dma_parms; 665 struct device_dma_parameters dma_parms;
643 666
644 struct gk20a_cde_app cde_app; 667 struct gk20a_cde_app cde_app;
@@ -716,6 +739,7 @@ enum gk20a_dbg_categories {
716 gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ 739 gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */
717 gpu_dbg_cde = BIT(10), /* cde info messages */ 740 gpu_dbg_cde = BIT(10), /* cde info messages */
718 gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ 741 gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */
742 gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */
719 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ 743 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
720}; 744};
721 745
@@ -962,4 +986,6 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
962 else 986 else
963 return (timeout * 10) / scale10x; 987 return (timeout * 10) / scale10x;
964} 988}
989
990u64 gk20a_read_ptimer(struct gk20a *g);
965#endif /* GK20A_H */ 991#endif /* GK20A_H */