aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2021-08-26 13:04:27 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2021-08-26 13:04:27 -0400
commit5f661d8a5db3f7875f6bf36b4843a71fd08ecbea (patch)
treeb18ce3ceb27fd885cd6aec19a3c342bb9e7963ef /nvdebug.h
Add initial implementation
Supports accessing and printing the runlist on the Jetson Xavier to dmesg. May work on other Jetson boards. Currently requires the nvgpu headers from NVIDIA's Linux4Tegra (L4T) source tree.
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h127
1 files changed, 127 insertions, 0 deletions
diff --git a/nvdebug.h b/nvdebug.h
new file mode 100644
index 0000000..aa5d0cf
--- /dev/null
+++ b/nvdebug.h
@@ -0,0 +1,127 @@
1/* Copyright 2021 Joshua Bakita
2 * SPDX-License-Identifier: MIT
3 */
4
5/* Runlist Channel
6 A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue
7 of GPU commands. These commands are typically queued from userspace.
8
9 `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU
10 virtual address space for this context. All channels in a TSG point to the
11 same GPU Instance Block.
12
13 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN
14 CHID (ID) : identifier of the channel to run (overlays ENTRY_ID)
15 RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if
16 more than one PBDMA is supported by the runlist
17
18 INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer
19 INST_PTR_HI : upper 32 bit of instance block pointer
20 INST_TARGET (TGI) : aperture of the instance block
21
22 USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer
23 USERD_PTR_HI : upper 32 bits of USERD pointer
24 USERD_TARGET (TGU) : aperture of the USERD data structure
25*/
26enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1};
27enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3};
28
29struct runlist_chan {
30// 0:63
31 enum ENTRY_TYPE entry_type:1;
32 uint32_t runqueue_selector:1;
33 uint32_t padding:2;
34 enum INST_TARGET inst_target:2;
35 uint32_t padding2:2;
36 uint32_t userd_ptr_lo:24;
37 uint32_t userd_ptr_hi:32;
38// 64:128
39 uint32_t chid:12;
40 uint32_t inst_ptr_lo:20;
41 uint32_t inst_ptr_hi:32;
42} __attribute__((packed));
43
44/* Runlist TSG (TimeSlice Group)
45 The runlist is composed of timeslice groups (TSG). Each TSG corresponds
46 to a single virtual address space on the GPU and contains `TSG_LENGTH`
47 channels. These channels and virtual address space are accessible to the GPU
48 host unit for use until the timeslice expires or a TSG switch is forcibly
49 initiated via a write to `NV_PFIFO_PREEMPT`.
50
51 timeslice = (TSG_TIMESLICE_TIMEOUT << TSG_TIMESLICE_SCALE) * 1024 nanoseconds
52
53 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_TSG
54 TSGID : identifier of the Timeslice group (overlays ENTRY_ID)
55 TSG_LENGTH : number of channels that are part of this timeslice group
56 TIMESLICE_SCALE : scale factor for the TSG's timeslice
57 TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice
58*/
59struct entry_tsg {
60// 0:63
61 enum ENTRY_TYPE entry_type:1;
62 uint64_t padding:15;
63 uint32_t timeslice_scale:4;
64 uint64_t padding2:4;
65 uint32_t timeslice_timeout:8;
66 uint32_t tsg_length:8;
67 uint32_t padding3:24;
68// 64:128
69 uint32_t tsgid:12;
70 uint64_t padding4:52;
71} __attribute__((packed));
72
73enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1};
74
75/* Preempt
76 ID/CHID : Id of TSG or channel to preempt
77*/
78#define NV_PFIFO_PREEMPT 0x00002634
79struct pfifo_preempt {
80 uint32_t id:12;
81 uint32_t padding:8;
82 bool is_pending:1;
83 uint32_t padding2:3;
84 enum PREEMPT_TYPE type:2;
85 uint32_t padding3:6;
86} __attribute__((packed));
87
88#define NV_PFIFO_RUNLIST_PREEMPT 0x00002638
89struct runlist_preempt {
90 bool runlist_0:1;
91 bool runlist_1:1;
92 bool runlist_2:1;
93 bool runlist_3:1;
94 bool runlist_4:1;
95 bool runlist_5:1;
96 bool runlist_6:1;
97 bool runlist_7:1;
98 bool runlist_8:1;
99 bool runlist_9:1;
100 bool runlist_10:1;
101 bool runlist_11:1;
102 bool runlist_12:1;
103 bool runlist_13:1;
104 uint32_t padding:28;
105} __attribute__((packed));
106
107// Note: This is different with Turing
108#define NV_PFIFO_RUNLIST_BASE 0x00002270
109typedef union {
110 struct {
111 uint32_t ptr:28;
112 uint32_t type:2;
113 uint32_t padding:2;
114 } __attribute__((packed));
115 uint32_t raw;
116} runlist_base_t;
117
118#define NV_PFIFO_RUNLIST 0x00002274
119typedef union {
120 struct {
121 uint32_t len:16;
122 uint32_t padding:4;
123 uint32_t id:4;
124 uint32_t padding2:8;
125 } __attribute__((packed));
126 uint32_t raw;
127} runlist_info_t;