diff options
author | Anton Vorontsov <avorontsov@nvidia.com> | 2015-08-19 17:27:51 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-03-23 10:48:47 -0400 |
commit | 1c40d09c4c9c011c1318c328c0b4b6b17d1f537e (patch) | |
tree | 8b93fcd00739f9ada9302f06175278c9cb1d6785 /drivers/gpu/nvgpu/gk20a/gk20a.h | |
parent | 82da6ed595a87c8a3038eecd75880ab21dd4c5de (diff) |
gpu: nvgpu: Add support for FECS ctxsw tracing
bug 1648908
This commit adds support for FECS ctxsw tracing. Code is compiled
conditionnaly under CONFIG_GK20_CTXSW_TRACE.
This feature requires an updated FECS ucode that writes one record to a ring
buffer on each context switch. On RM/Kernel side, the GPU driver reads records
from the master ring buffer and generates trace entries into a user-facing
VM ring buffer. For each record in the master ring buffer, RM/Kernel has
to retrieve the vmid+pid of the user process that submitted related work.
Features currently implemented:
- master ring buffer allocation
- debugfs to dump master ring buffer
- FECS record per context switch (with both current and new contexts)
- dedicated device for ctxsw tracing (access to VM ring buffer)
- SOF generation (and access to PTIMER)
- VM ring buffer allocation, and reconfiguration
- enable/disable tracing at user level
- event-based trace filtering
- context_ptr to vmid+pid mapping
- read system call for ctxsw dev
- mmap system call for ctxsw dev (direct access to VM ring buffer)
- poll system call for ctxsw dev
- save/restore register on ELPG/CG6
- separate user ring from FECS ring handling
Features requiring ucode changes:
- enable/disable tracing at FECS level
- actual busy time on engine (bug 1642354)
- master ring buffer threshold interrupt (P1)
- API for GPU to CPU timestamp conversion (P1)
- vmid/pid/uid based filtering (P1)
Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1022737
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8b87c7aa..541e7b50 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -25,6 +25,8 @@ struct channel_gk20a; | |||
25 | struct gr_gk20a; | 25 | struct gr_gk20a; |
26 | struct sim_gk20a; | 26 | struct sim_gk20a; |
27 | struct gk20a_ctxsw_ucode_segments; | 27 | struct gk20a_ctxsw_ucode_segments; |
28 | struct gk20a_fecs_trace; | ||
29 | struct gk20a_ctxsw_trace; | ||
28 | struct acr_gm20b; | 30 | struct acr_gm20b; |
29 | 31 | ||
30 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
@@ -373,6 +375,19 @@ struct gpu_ops { | |||
373 | bool use_dma_for_fw_bootstrap; | 375 | bool use_dma_for_fw_bootstrap; |
374 | } gr_ctx; | 376 | } gr_ctx; |
375 | struct { | 377 | struct { |
378 | int (*init)(struct gk20a *g); | ||
379 | int (*max_entries)(struct gk20a *, | ||
380 | struct nvgpu_ctxsw_trace_filter *); | ||
381 | int (*flush)(struct gk20a *g); | ||
382 | int (*poll)(struct gk20a *g); | ||
383 | int (*enable)(struct gk20a *g); | ||
384 | int (*disable)(struct gk20a *g); | ||
385 | int (*reset)(struct gk20a *g); | ||
386 | int (*bind_channel)(struct gk20a *, struct channel_gk20a *); | ||
387 | int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); | ||
388 | int (*deinit)(struct gk20a *g); | ||
389 | } fecs_trace; | ||
390 | struct { | ||
376 | bool (*support_sparse)(struct gk20a *g); | 391 | bool (*support_sparse)(struct gk20a *g); |
377 | bool (*is_debug_mode_enabled)(struct gk20a *g); | 392 | bool (*is_debug_mode_enabled)(struct gk20a *g); |
378 | void (*set_debug_mode)(struct gk20a *g, bool enable); | 393 | void (*set_debug_mode)(struct gk20a *g, bool enable); |
@@ -613,6 +628,11 @@ struct gk20a { | |||
613 | struct device *node; | 628 | struct device *node; |
614 | } tsg; | 629 | } tsg; |
615 | 630 | ||
631 | struct { | ||
632 | struct cdev cdev; | ||
633 | struct device *node; | ||
634 | } ctxsw; | ||
635 | |||
616 | struct mutex client_lock; | 636 | struct mutex client_lock; |
617 | int client_refcount; /* open channels and ctrl nodes */ | 637 | int client_refcount; /* open channels and ctrl nodes */ |
618 | 638 | ||
@@ -639,6 +659,9 @@ struct gk20a { | |||
639 | 659 | ||
640 | struct gk20a_scale_profile *scale_profile; | 660 | struct gk20a_scale_profile *scale_profile; |
641 | 661 | ||
662 | struct gk20a_ctxsw_trace *ctxsw_trace; | ||
663 | struct gk20a_fecs_trace *fecs_trace; | ||
664 | |||
642 | struct device_dma_parameters dma_parms; | 665 | struct device_dma_parameters dma_parms; |
643 | 666 | ||
644 | struct gk20a_cde_app cde_app; | 667 | struct gk20a_cde_app cde_app; |
@@ -716,6 +739,7 @@ enum gk20a_dbg_categories { | |||
716 | gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ | 739 | gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ |
717 | gpu_dbg_cde = BIT(10), /* cde info messages */ | 740 | gpu_dbg_cde = BIT(10), /* cde info messages */ |
718 | gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ | 741 | gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ |
742 | gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */ | ||
719 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ | 743 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ |
720 | }; | 744 | }; |
721 | 745 | ||
@@ -962,4 +986,6 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x) | |||
962 | else | 986 | else |
963 | return (timeout * 10) / scale10x; | 987 | return (timeout * 10) / scale10x; |
964 | } | 988 | } |
989 | |||
990 | u64 gk20a_read_ptimer(struct gk20a *g); | ||
965 | #endif /* GK20A_H */ | 991 | #endif /* GK20A_H */ |