summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorAnton Vorontsov <avorontsov@nvidia.com>2015-08-19 17:27:51 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-03-23 10:48:47 -0400
commit1c40d09c4c9c011c1318c328c0b4b6b17d1f537e (patch)
tree8b93fcd00739f9ada9302f06175278c9cb1d6785 /include
parent82da6ed595a87c8a3038eecd75880ab21dd4c5de (diff)
gpu: nvgpu: Add support for FECS ctxsw tracing
bug 1648908 This commit adds support for FECS ctxsw tracing. Code is compiled conditionnaly under CONFIG_GK20_CTXSW_TRACE. This feature requires an updated FECS ucode that writes one record to a ring buffer on each context switch. On RM/Kernel side, the GPU driver reads records from the master ring buffer and generates trace entries into a user-facing VM ring buffer. For each record in the master ring buffer, RM/Kernel has to retrieve the vmid+pid of the user process that submitted related work. Features currently implemented: - master ring buffer allocation - debugfs to dump master ring buffer - FECS record per context switch (with both current and new contexts) - dedicated device for ctxsw tracing (access to VM ring buffer) - SOF generation (and access to PTIMER) - VM ring buffer allocation, and reconfiguration - enable/disable tracing at user level - event-based trace filtering - context_ptr to vmid+pid mapping - read system call for ctxsw dev - mmap system call for ctxsw dev (direct access to VM ring buffer) - poll system call for ctxsw dev - save/restore register on ELPG/CG6 - separate user ring from FECS ring handling Features requiring ucode changes: - enable/disable tracing at FECS level - actual busy time on engine (bug 1642354) - master ring buffer threshold interrupt (P1) - API for GPU to CPU timestamp conversion (P1) - vmid/pid/uid based filtering (P1) Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1022737 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'include')
-rw-r--r--include/trace/events/gk20a.h6
-rw-r--r--include/uapi/linux/nvgpu.h90
2 files changed, 93 insertions, 3 deletions
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h
index 461ff6e8..23b5b642 100644
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -387,7 +387,7 @@ TRACE_EVENT(gk20a_as_ioctl_get_va_regions,
387TRACE_EVENT(gk20a_mmu_fault, 387TRACE_EVENT(gk20a_mmu_fault,
388 TP_PROTO(u32 fault_hi, u32 fault_lo, 388 TP_PROTO(u32 fault_hi, u32 fault_lo,
389 u32 fault_info, 389 u32 fault_info,
390 u32 instance, 390 u64 instance,
391 u32 engine_id, 391 u32 engine_id,
392 const char *engine, 392 const char *engine,
393 const char *client, 393 const char *client,
@@ -398,7 +398,7 @@ TRACE_EVENT(gk20a_mmu_fault,
398 __field(u32, fault_hi) 398 __field(u32, fault_hi)
399 __field(u32, fault_lo) 399 __field(u32, fault_lo)
400 __field(u32, fault_info) 400 __field(u32, fault_info)
401 __field(u32, instance) 401 __field(u64, instance)
402 __field(u32, engine_id) 402 __field(u32, engine_id)
403 __field(const char *, engine) 403 __field(const char *, engine)
404 __field(const char *, client) 404 __field(const char *, client)
@@ -414,7 +414,7 @@ TRACE_EVENT(gk20a_mmu_fault,
414 __entry->client = client; 414 __entry->client = client;
415 __entry->fault_type = fault_type; 415 __entry->fault_type = fault_type;
416 ), 416 ),
417 TP_printk("fault=0x%x,%08x info=0x%x instance=0x%x engine_id=%d engine=%s client=%s type=%s", 417 TP_printk("fault=0x%x,%08x info=0x%x instance=0x%llx engine_id=%d engine=%s client=%s type=%s",
418 __entry->fault_hi, __entry->fault_lo, 418 __entry->fault_hi, __entry->fault_lo,
419 __entry->fault_info, __entry->instance, __entry->engine_id, 419 __entry->fault_info, __entry->instance, __entry->engine_id,
420 __entry->engine, __entry->client, __entry->fault_type) 420 __entry->engine, __entry->client, __entry->fault_type)
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 0c8de87f..64ac45b5 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1215,4 +1215,94 @@ struct nvgpu_as_map_buffer_batch_args {
1215#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ 1215#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \
1216 sizeof(struct nvgpu_as_map_buffer_ex_args) 1216 sizeof(struct nvgpu_as_map_buffer_ex_args)
1217 1217
1218
1219/*
1220 * /dev/nvhost-ctxsw-gpu device
1221 *
1222 * Opening a '/dev/nvhost-ctxsw-gpu' device node creates a way to trace
1223 * context switches on GR engine
1224 */
1225
1226#define NVGPU_CTXSW_IOCTL_MAGIC 'C'
1227
1228#define NVGPU_CTXSW_TAG_SOF 0x00
1229#define NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST 0x01
1230#define NVGPU_CTXSW_TAG_FE_ACK 0x02
1231#define NVGPU_CTXSW_TAG_FE_ACK_WFI 0x0a
1232#define NVGPU_CTXSW_TAG_FE_ACK_GFXP 0x0b
1233#define NVGPU_CTXSW_TAG_FE_ACK_CTAP 0x0c
1234#define NVGPU_CTXSW_TAG_FE_ACK_CILP 0x0d
1235#define NVGPU_CTXSW_TAG_SAVE_END 0x03
1236#define NVGPU_CTXSW_TAG_RESTORE_START 0x04
1237#define NVGPU_CTXSW_TAG_CONTEXT_START 0x05
1238#define NVGPU_CTXSW_TAG_INVALID_TIMESTAMP 0xff
1239#define NVGPU_CTXSW_TAG_LAST \
1240 NVGPU_CTXSW_TAG_INVALID_TIMESTAMP
1241
1242struct nvgpu_ctxsw_trace_entry {
1243 __u8 tag;
1244 __u8 vmid;
1245 __u16 seqno; /* sequence number to detect drops */
1246 __u32 context_id; /* context_id as allocated by FECS */
1247 __u64 pid; /* 64-bit is max bits of different OS pid */
1248 __u64 timestamp; /* 64-bit time */
1249};
1250
1251#define NVGPU_CTXSW_RING_HEADER_MAGIC 0x7000fade
1252#define NVGPU_CTXSW_RING_HEADER_VERSION 0
1253
1254struct nvgpu_ctxsw_ring_header {
1255 __u32 magic;
1256 __u32 version;
1257 __u32 num_ents;
1258 __u32 ent_size;
1259 volatile __u32 drop_count; /* excluding filtered out events */
1260 volatile __u32 write_seqno;
1261 volatile __u32 write_idx;
1262 volatile __u32 read_idx;
1263};
1264
1265struct nvgpu_ctxsw_ring_setup_args {
1266 __u32 size; /* [in/out] size of ring buffer in bytes (including
1267 header). will be rounded page size. this parameter
1268 is updated with actual allocated size. */
1269};
1270
1271#define NVGPU_CTXSW_FILTER_SIZE (NVGPU_CTXSW_TAG_LAST + 1)
1272#define NVGPU_CTXSW_FILTER_SET(n, p) \
1273 ((p)->tag_bits[(n) / 64] |= (1 << ((n) & 63)))
1274#define NVGPU_CTXSW_FILTER_CLR(n, p) \
1275 ((p)->tag_bits[(n) / 64] &= ~(1 << ((n) & 63)))
1276#define NVGPU_CTXSW_FILTER_ISSET(n, p) \
1277 ((p)->tag_bits[(n) / 64] & (1 << ((n) & 63)))
1278#define NVGPU_CTXSW_FILTER_CLR_ALL(p) memset((void *)(p), 0, sizeof(*(p)))
1279#define NVGPU_CTXSW_FILTER_SET_ALL(p) memset((void *)(p), ~0, sizeof(*(p)))
1280
1281struct nvgpu_ctxsw_trace_filter {
1282 __u64 tag_bits[(NVGPU_CTXSW_FILTER_SIZE + 63) / 64];
1283};
1284
1285struct nvgpu_ctxsw_trace_filter_args {
1286 struct nvgpu_ctxsw_trace_filter filter;
1287};
1288
1289#define NVGPU_CTXSW_IOCTL_TRACE_ENABLE \
1290 _IO(NVGPU_CTXSW_IOCTL_MAGIC, 1)
1291#define NVGPU_CTXSW_IOCTL_TRACE_DISABLE \
1292 _IO(NVGPU_CTXSW_IOCTL_MAGIC, 2)
1293#define NVGPU_CTXSW_IOCTL_RING_SETUP \
1294 _IOWR(NVGPU_CTXSW_IOCTL_MAGIC, 3, struct nvgpu_ctxsw_ring_setup_args)
1295#define NVGPU_CTXSW_IOCTL_SET_FILTER \
1296 _IOW(NVGPU_CTXSW_IOCTL_MAGIC, 4, struct nvgpu_ctxsw_trace_filter_args)
1297#define NVGPU_CTXSW_IOCTL_GET_FILTER \
1298 _IOR(NVGPU_CTXSW_IOCTL_MAGIC, 5, struct nvgpu_ctxsw_trace_filter_args)
1299#define NVGPU_CTXSW_IOCTL_POLL \
1300 _IO(NVGPU_CTXSW_IOCTL_MAGIC, 6)
1301
1302#define NVGPU_CTXSW_IOCTL_LAST \
1303 _IOC_NR(NVGPU_CTXSW_IOCTL_POLL)
1304
1305#define NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE \
1306 sizeof(struct nvgpu_ctxsw_trace_filter_args)
1307
1218#endif 1308#endif