diff options
author | Anton Vorontsov <avorontsov@nvidia.com> | 2015-08-19 17:27:51 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-03-23 10:48:47 -0400 |
commit | 1c40d09c4c9c011c1318c328c0b4b6b17d1f537e (patch) | |
tree | 8b93fcd00739f9ada9302f06175278c9cb1d6785 /include/uapi | |
parent | 82da6ed595a87c8a3038eecd75880ab21dd4c5de (diff) |
gpu: nvgpu: Add support for FECS ctxsw tracing
bug 1648908
This commit adds support for FECS ctxsw tracing. Code is compiled
conditionnaly under CONFIG_GK20_CTXSW_TRACE.
This feature requires an updated FECS ucode that writes one record to a ring
buffer on each context switch. On RM/Kernel side, the GPU driver reads records
from the master ring buffer and generates trace entries into a user-facing
VM ring buffer. For each record in the master ring buffer, RM/Kernel has
to retrieve the vmid+pid of the user process that submitted related work.
Features currently implemented:
- master ring buffer allocation
- debugfs to dump master ring buffer
- FECS record per context switch (with both current and new contexts)
- dedicated device for ctxsw tracing (access to VM ring buffer)
- SOF generation (and access to PTIMER)
- VM ring buffer allocation, and reconfiguration
- enable/disable tracing at user level
- event-based trace filtering
- context_ptr to vmid+pid mapping
- read system call for ctxsw dev
- mmap system call for ctxsw dev (direct access to VM ring buffer)
- poll system call for ctxsw dev
- save/restore register on ELPG/CG6
- separate user ring from FECS ring handling
Features requiring ucode changes:
- enable/disable tracing at FECS level
- actual busy time on engine (bug 1642354)
- master ring buffer threshold interrupt (P1)
- API for GPU to CPU timestamp conversion (P1)
- vmid/pid/uid based filtering (P1)
Change-Id: I8e39c648221ee0fa09d5df8524b03dca83fe24f3
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1022737
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'include/uapi')
-rw-r--r-- | include/uapi/linux/nvgpu.h | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 0c8de87f..64ac45b5 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -1215,4 +1215,94 @@ struct nvgpu_as_map_buffer_batch_args { | |||
1215 | #define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ | 1215 | #define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ |
1216 | sizeof(struct nvgpu_as_map_buffer_ex_args) | 1216 | sizeof(struct nvgpu_as_map_buffer_ex_args) |
1217 | 1217 | ||
1218 | |||
1219 | /* | ||
1220 | * /dev/nvhost-ctxsw-gpu device | ||
1221 | * | ||
1222 | * Opening a '/dev/nvhost-ctxsw-gpu' device node creates a way to trace | ||
1223 | * context switches on GR engine | ||
1224 | */ | ||
1225 | |||
1226 | #define NVGPU_CTXSW_IOCTL_MAGIC 'C' | ||
1227 | |||
1228 | #define NVGPU_CTXSW_TAG_SOF 0x00 | ||
1229 | #define NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST 0x01 | ||
1230 | #define NVGPU_CTXSW_TAG_FE_ACK 0x02 | ||
1231 | #define NVGPU_CTXSW_TAG_FE_ACK_WFI 0x0a | ||
1232 | #define NVGPU_CTXSW_TAG_FE_ACK_GFXP 0x0b | ||
1233 | #define NVGPU_CTXSW_TAG_FE_ACK_CTAP 0x0c | ||
1234 | #define NVGPU_CTXSW_TAG_FE_ACK_CILP 0x0d | ||
1235 | #define NVGPU_CTXSW_TAG_SAVE_END 0x03 | ||
1236 | #define NVGPU_CTXSW_TAG_RESTORE_START 0x04 | ||
1237 | #define NVGPU_CTXSW_TAG_CONTEXT_START 0x05 | ||
1238 | #define NVGPU_CTXSW_TAG_INVALID_TIMESTAMP 0xff | ||
1239 | #define NVGPU_CTXSW_TAG_LAST \ | ||
1240 | NVGPU_CTXSW_TAG_INVALID_TIMESTAMP | ||
1241 | |||
1242 | struct nvgpu_ctxsw_trace_entry { | ||
1243 | __u8 tag; | ||
1244 | __u8 vmid; | ||
1245 | __u16 seqno; /* sequence number to detect drops */ | ||
1246 | __u32 context_id; /* context_id as allocated by FECS */ | ||
1247 | __u64 pid; /* 64-bit is max bits of different OS pid */ | ||
1248 | __u64 timestamp; /* 64-bit time */ | ||
1249 | }; | ||
1250 | |||
1251 | #define NVGPU_CTXSW_RING_HEADER_MAGIC 0x7000fade | ||
1252 | #define NVGPU_CTXSW_RING_HEADER_VERSION 0 | ||
1253 | |||
1254 | struct nvgpu_ctxsw_ring_header { | ||
1255 | __u32 magic; | ||
1256 | __u32 version; | ||
1257 | __u32 num_ents; | ||
1258 | __u32 ent_size; | ||
1259 | volatile __u32 drop_count; /* excluding filtered out events */ | ||
1260 | volatile __u32 write_seqno; | ||
1261 | volatile __u32 write_idx; | ||
1262 | volatile __u32 read_idx; | ||
1263 | }; | ||
1264 | |||
1265 | struct nvgpu_ctxsw_ring_setup_args { | ||
1266 | __u32 size; /* [in/out] size of ring buffer in bytes (including | ||
1267 | header). will be rounded page size. this parameter | ||
1268 | is updated with actual allocated size. */ | ||
1269 | }; | ||
1270 | |||
1271 | #define NVGPU_CTXSW_FILTER_SIZE (NVGPU_CTXSW_TAG_LAST + 1) | ||
1272 | #define NVGPU_CTXSW_FILTER_SET(n, p) \ | ||
1273 | ((p)->tag_bits[(n) / 64] |= (1 << ((n) & 63))) | ||
1274 | #define NVGPU_CTXSW_FILTER_CLR(n, p) \ | ||
1275 | ((p)->tag_bits[(n) / 64] &= ~(1 << ((n) & 63))) | ||
1276 | #define NVGPU_CTXSW_FILTER_ISSET(n, p) \ | ||
1277 | ((p)->tag_bits[(n) / 64] & (1 << ((n) & 63))) | ||
1278 | #define NVGPU_CTXSW_FILTER_CLR_ALL(p) memset((void *)(p), 0, sizeof(*(p))) | ||
1279 | #define NVGPU_CTXSW_FILTER_SET_ALL(p) memset((void *)(p), ~0, sizeof(*(p))) | ||
1280 | |||
1281 | struct nvgpu_ctxsw_trace_filter { | ||
1282 | __u64 tag_bits[(NVGPU_CTXSW_FILTER_SIZE + 63) / 64]; | ||
1283 | }; | ||
1284 | |||
1285 | struct nvgpu_ctxsw_trace_filter_args { | ||
1286 | struct nvgpu_ctxsw_trace_filter filter; | ||
1287 | }; | ||
1288 | |||
1289 | #define NVGPU_CTXSW_IOCTL_TRACE_ENABLE \ | ||
1290 | _IO(NVGPU_CTXSW_IOCTL_MAGIC, 1) | ||
1291 | #define NVGPU_CTXSW_IOCTL_TRACE_DISABLE \ | ||
1292 | _IO(NVGPU_CTXSW_IOCTL_MAGIC, 2) | ||
1293 | #define NVGPU_CTXSW_IOCTL_RING_SETUP \ | ||
1294 | _IOWR(NVGPU_CTXSW_IOCTL_MAGIC, 3, struct nvgpu_ctxsw_ring_setup_args) | ||
1295 | #define NVGPU_CTXSW_IOCTL_SET_FILTER \ | ||
1296 | _IOW(NVGPU_CTXSW_IOCTL_MAGIC, 4, struct nvgpu_ctxsw_trace_filter_args) | ||
1297 | #define NVGPU_CTXSW_IOCTL_GET_FILTER \ | ||
1298 | _IOR(NVGPU_CTXSW_IOCTL_MAGIC, 5, struct nvgpu_ctxsw_trace_filter_args) | ||
1299 | #define NVGPU_CTXSW_IOCTL_POLL \ | ||
1300 | _IO(NVGPU_CTXSW_IOCTL_MAGIC, 6) | ||
1301 | |||
1302 | #define NVGPU_CTXSW_IOCTL_LAST \ | ||
1303 | _IOC_NR(NVGPU_CTXSW_IOCTL_POLL) | ||
1304 | |||
1305 | #define NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE \ | ||
1306 | sizeof(struct nvgpu_ctxsw_trace_filter_args) | ||
1307 | |||
1218 | #endif | 1308 | #endif |