aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h93
1 files changed, 90 insertions, 3 deletions
diff --git a/nvdebug.h b/nvdebug.h
index b4ff0a4..cd0dc90 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -12,7 +12,20 @@
12 12
13 `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU 13 `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU
14 virtual address space for this context. All channels in a TSG point to the 14 virtual address space for this context. All channels in a TSG point to the
15 same GPU Instance Block. 15 same GPU Instance Block (?).
16
17 "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and
18 thereby which PBDMA will run the channel. Increasing values select
19 increasingly numbered PBDMA IDs serving the runlist. If the selector value
20 exceeds the number of PBDMAs on the runlist, the hardware will silently
21 reassign the channel to run on the first PBDMA as though RUNQUEUE_SELECTOR had
22 been set to 0. (In current hardware, this is used by SCG on the graphics
23 runlist only to determine which FE pipe should service a given channel. A
24 value of 0 targets the first FE pipe, which can process all FE driven engines:
25 Graphics, Compute, Inline2Memory, and TwoD. A value of 1 targets the second
26 FE pipe, which can only process Compute work. Note that GRCE work is allowed
27 on either runqueue." (NVIDIA) Note that it appears runqueue 1 is the default
28 for CUDA work on the Jetson Xavier.
16 29
17 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN 30 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN
18 CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) 31 CHID (ID) : identifier of the channel to run (overlays ENTRY_ID)
@@ -29,6 +42,19 @@
29*/ 42*/
30enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; 43enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1};
31enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; 44enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3};
45static inline char* target_to_text(enum INST_TARGET t) {
46 switch (t) {
47 case TARGET_VID_MEM:
48 return "VID_MEM";
49 case TARGET_SYS_MEM_COHERENT:
50 return "SYS_MEM_COHERENT";
51 case TARGET_SYS_MEM_NONCOHERENT:
52 return "SYS_MEM_NONCOHERENT";
53 default:
54 printk(KERN_WARNING "[nvdebug] Invalid aperture!\n");
55 return NULL;
56 }
57}
32 58
33struct runlist_chan { 59struct runlist_chan {
34// 0:63 60// 0:63
@@ -55,10 +81,10 @@ struct runlist_chan {
55 timeslice = (TSG_TIMESLICE_TIMEOUT << TSG_TIMESLICE_SCALE) * 1024 nanoseconds 81 timeslice = (TSG_TIMESLICE_TIMEOUT << TSG_TIMESLICE_SCALE) * 1024 nanoseconds
56 82
57 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_TSG 83 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_TSG
58 TSGID : identifier of the Timeslice group (overlays ENTRY_ID)
59 TSG_LENGTH : number of channels that are part of this timeslice group
60 TIMESLICE_SCALE : scale factor for the TSG's timeslice 84 TIMESLICE_SCALE : scale factor for the TSG's timeslice
61 TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice 85 TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice
86 TSG_LENGTH : number of channels that are part of this timeslice group
87 TSGID : identifier of the Timeslice group (overlays ENTRY_ID)
62*/ 88*/
63struct entry_tsg { 89struct entry_tsg {
64// 0:63 90// 0:63
@@ -130,6 +156,52 @@ typedef union {
130 uint32_t raw; 156 uint32_t raw;
131} runlist_info_t; 157} runlist_info_t;
132 158
159enum CHANNEL_STATUS {
160 CHANNEL_STATUS_IDLE = 0,
161 CHANNEL_STATUS_PENDING = 1,
162 CHANNEL_STATUS_PENDING_CTX_RELOAD = 2,
163 CHANNEL_STATUS_PENDING_ACQUIRE = 3,
164 CHANNEL_STATUS_PENDING_ACQ_CTX_RELOAD = 4,
165 CHANNEL_STATUS_ON_PBDMA = 5,
166 CHANNEL_STATUS_ON_PBDMA_AND_ENG = 6,
167 CHANNEL_STATUS_ON_ENG = 7,
168 CHANNEL_STATUS_ON_ENG_PENDING_ACQUIRE = 8,
169 CHANNEL_STATUS_ON_ENG_PENDING = 9,
170 CHANNEL_STATUS_ON_PBDMA_CTX_RELOAD = 10,
171 CHANNEL_STATUS_ON_PBDMA_AND_ENG_CTX_RELOAD = 11,
172 CHANNEL_STATUS_ON_ENG_CTX_RELOAD = 12,
173 CHANNEL_STATUS_ON_ENG_PENDING_CTX_RELOAD = 13,
174 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14,
175};
176
177#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8)
178#define MAX_CHID 512 // TODO: Double-check this is right
179// There are a total of 512 possible channels
180typedef union {
181 struct {
182// 0:31
183 uint32_t inst_ptr:28;
184 enum INST_TARGET inst_target:2;
185 uint32_t padding0:1;
186 bool inst_bind:1;
187// 32:64
188 bool enable:1;
189 bool next:1;
190 uint32_t padding:6;
191 bool force_ctx_reload:1;
192 uint32_t padding2:1;
193 bool enable_set:1;
194 bool enable_clear:1;
195 uint32_t padding3:10;
196 bool pbdma_faulted:1;
197 bool eng_faulted:1;
198 enum CHANNEL_STATUS status:4;
199 bool busy:1;
200 uint32_t padding4:3;
201 } __attribute__((packed));
202 uint64_t raw;
203} channel_ctrl_t;
204
133// TODO(jbakita): Maybe put the above GPU types in a different file. 205// TODO(jbakita): Maybe put the above GPU types in a different file.
134 206
135#define for_chan_in_tsg(chan, tsg) \ 207#define for_chan_in_tsg(chan, tsg) \
@@ -146,6 +218,7 @@ struct runlist_iter {
146}; 218};
147 219
148// Defined in runlist.c 220// Defined in runlist.c
221struct gk20a* get_live_gk20a(void);
149int get_runlist_iter(struct runlist_iter *rl_iter); 222int get_runlist_iter(struct runlist_iter *rl_iter);
150 223
151static inline struct gk20a *get_gk20a(struct device *dev) { 224static inline struct gk20a *get_gk20a(struct device *dev) {
@@ -164,6 +237,20 @@ static inline u32 nvdebug_readl(struct gk20a* g, u32 r) {
164 return readl(g_os->regs + r); 237 return readl(g_os->regs + r);
165} 238}
166 239
240// quadword version of nvdebug_readl()
241static inline u64 nvdebug_readq(struct gk20a* g, u32 r) {
242 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g);
243 u64 ret;
244 if (unlikely(!g_os->regs)) {
245 printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n");
246 return -1;
247 }
248 // readq seems to always return the uppermost 32 bits as 0, so workaround with readl
249 ret = readl(g_os->regs + r);
250 ret |= ((u64)readl(g_os->regs + r + 4)) << 32;
251 return ret;
252}
253
167// Functionally identical to nvgpu_writel() 254// Functionally identical to nvgpu_writel()
168static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { 255static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) {
169 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); 256 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g);