diff options
author | Joshua Bakita <jbakita@cs.unc.edu> | 2021-08-26 13:04:27 -0400 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2021-08-26 13:04:27 -0400 |
commit | 5f661d8a5db3f7875f6bf36b4843a71fd08ecbea (patch) | |
tree | b18ce3ceb27fd885cd6aec19a3c342bb9e7963ef /nvdebug.h |
Add initial implementation
Supports accessing and printing the runlist on the Jetson Xavier to
dmesg. May work on other Jetson boards. Currently requires the nvgpu
headers from NVIDIA's Linux4Tegra (L4T) source tree.
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/nvdebug.h b/nvdebug.h new file mode 100644 index 0000000..aa5d0cf --- /dev/null +++ b/nvdebug.h | |||
@@ -0,0 +1,127 @@ | |||
1 | /* Copyright 2021 Joshua Bakita | ||
2 | * SPDX-License-Identifier: MIT | ||
3 | */ | ||
4 | |||
5 | /* Runlist Channel | ||
6 | A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue | ||
7 | of GPU commands. These commands are typically queued from userspace. | ||
8 | |||
9 | `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU | ||
10 | virtual address space for this context. All channels in a TSG point to the | ||
11 | same GPU Instance Block. | ||
12 | |||
13 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN | ||
14 | CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) | ||
15 | RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if | ||
16 | more than one PBDMA is supported by the runlist | ||
17 | |||
18 | INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer | ||
19 | INST_PTR_HI : upper 32 bit of instance block pointer | ||
20 | INST_TARGET (TGI) : aperture of the instance block | ||
21 | |||
22 | USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer | ||
23 | USERD_PTR_HI : upper 32 bits of USERD pointer | ||
24 | USERD_TARGET (TGU) : aperture of the USERD data structure | ||
25 | */ | ||
26 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; | ||
27 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; | ||
28 | |||
29 | struct runlist_chan { | ||
30 | // 0:63 | ||
31 | enum ENTRY_TYPE entry_type:1; | ||
32 | uint32_t runqueue_selector:1; | ||
33 | uint32_t padding:2; | ||
34 | enum INST_TARGET inst_target:2; | ||
35 | uint32_t padding2:2; | ||
36 | uint32_t userd_ptr_lo:24; | ||
37 | uint32_t userd_ptr_hi:32; | ||
38 | // 64:128 | ||
39 | uint32_t chid:12; | ||
40 | uint32_t inst_ptr_lo:20; | ||
41 | uint32_t inst_ptr_hi:32; | ||
42 | } __attribute__((packed)); | ||
43 | |||
44 | /* Runlist TSG (TimeSlice Group) | ||
45 | The runlist is composed of timeslice groups (TSG). Each TSG corresponds | ||
46 | to a single virtual address space on the GPU and contains `TSG_LENGTH` | ||
47 | channels. These channels and virtual address space are accessible to the GPU | ||
48 | host unit for use until the timeslice expires or a TSG switch is forcibly | ||
49 | initiated via a write to `NV_PFIFO_PREEMPT`. | ||
50 | |||
51 | timeslice = (TSG_TIMESLICE_TIMEOUT << TSG_TIMESLICE_SCALE) * 1024 nanoseconds | ||
52 | |||
53 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_TSG | ||
54 | TSGID : identifier of the Timeslice group (overlays ENTRY_ID) | ||
55 | TSG_LENGTH : number of channels that are part of this timeslice group | ||
56 | TIMESLICE_SCALE : scale factor for the TSG's timeslice | ||
57 | TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice | ||
58 | */ | ||
59 | struct entry_tsg { | ||
60 | // 0:63 | ||
61 | enum ENTRY_TYPE entry_type:1; | ||
62 | uint64_t padding:15; | ||
63 | uint32_t timeslice_scale:4; | ||
64 | uint64_t padding2:4; | ||
65 | uint32_t timeslice_timeout:8; | ||
66 | uint32_t tsg_length:8; | ||
67 | uint32_t padding3:24; | ||
68 | // 64:128 | ||
69 | uint32_t tsgid:12; | ||
70 | uint64_t padding4:52; | ||
71 | } __attribute__((packed)); | ||
72 | |||
73 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | ||
74 | |||
75 | /* Preempt | ||
76 | ID/CHID : Id of TSG or channel to preempt | ||
77 | */ | ||
78 | #define NV_PFIFO_PREEMPT 0x00002634 | ||
79 | struct pfifo_preempt { | ||
80 | uint32_t id:12; | ||
81 | uint32_t padding:8; | ||
82 | bool is_pending:1; | ||
83 | uint32_t padding2:3; | ||
84 | enum PREEMPT_TYPE type:2; | ||
85 | uint32_t padding3:6; | ||
86 | } __attribute__((packed)); | ||
87 | |||
88 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 | ||
89 | struct runlist_preempt { | ||
90 | bool runlist_0:1; | ||
91 | bool runlist_1:1; | ||
92 | bool runlist_2:1; | ||
93 | bool runlist_3:1; | ||
94 | bool runlist_4:1; | ||
95 | bool runlist_5:1; | ||
96 | bool runlist_6:1; | ||
97 | bool runlist_7:1; | ||
98 | bool runlist_8:1; | ||
99 | bool runlist_9:1; | ||
100 | bool runlist_10:1; | ||
101 | bool runlist_11:1; | ||
102 | bool runlist_12:1; | ||
103 | bool runlist_13:1; | ||
104 | uint32_t padding:28; | ||
105 | } __attribute__((packed)); | ||
106 | |||
107 | // Note: This is different with Turing | ||
108 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 | ||
109 | typedef union { | ||
110 | struct { | ||
111 | uint32_t ptr:28; | ||
112 | uint32_t type:2; | ||
113 | uint32_t padding:2; | ||
114 | } __attribute__((packed)); | ||
115 | uint32_t raw; | ||
116 | } runlist_base_t; | ||
117 | |||
118 | #define NV_PFIFO_RUNLIST 0x00002274 | ||
119 | typedef union { | ||
120 | struct { | ||
121 | uint32_t len:16; | ||
122 | uint32_t padding:4; | ||
123 | uint32_t id:4; | ||
124 | uint32_t padding2:8; | ||
125 | } __attribute__((packed)); | ||
126 | uint32_t raw; | ||
127 | } runlist_info_t; | ||