diff options
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 93 |
1 files changed, 90 insertions, 3 deletions
@@ -12,7 +12,20 @@ | |||
12 | 12 | ||
13 | `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU | 13 | `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU |
14 | virtual address space for this context. All channels in a TSG point to the | 14 | virtual address space for this context. All channels in a TSG point to the |
15 | same GPU Instance Block. | 15 | same GPU Instance Block (?). |
16 | |||
17 | "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and | ||
18 | thereby which PBDMA will run the channel. Increasing values select | ||
19 | increasingly numbered PBDMA IDs serving the runlist. If the selector value | ||
20 | exceeds the number of PBDMAs on the runlist, the hardware will silently | ||
21 | reassign the channel to run on the first PBDMA as though RUNQUEUE_SELECTOR had | ||
22 | been set to 0. (In current hardware, this is used by SCG on the graphics | ||
23 | runlist only to determine which FE pipe should service a given channel. A | ||
24 | value of 0 targets the first FE pipe, which can process all FE driven engines: | ||
25 | Graphics, Compute, Inline2Memory, and TwoD. A value of 1 targets the second | ||
26 | FE pipe, which can only process Compute work. Note that GRCE work is allowed | ||
27 | on either runqueue." (NVIDIA) Note that it appears runqueue 1 is the default | ||
28 | for CUDA work on the Jetson Xavier. | ||
16 | 29 | ||
17 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN | 30 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN |
18 | CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) | 31 | CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) |
@@ -29,6 +42,19 @@ | |||
29 | */ | 42 | */ |
30 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; | 43 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; |
31 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; | 44 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; |
45 | static inline char* target_to_text(enum INST_TARGET t) { | ||
46 | switch (t) { | ||
47 | case TARGET_VID_MEM: | ||
48 | return "VID_MEM"; | ||
49 | case TARGET_SYS_MEM_COHERENT: | ||
50 | return "SYS_MEM_COHERENT"; | ||
51 | case TARGET_SYS_MEM_NONCOHERENT: | ||
52 | return "SYS_MEM_NONCOHERENT"; | ||
53 | default: | ||
54 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); | ||
55 | return NULL; | ||
56 | } | ||
57 | } | ||
32 | 58 | ||
33 | struct runlist_chan { | 59 | struct runlist_chan { |
34 | // 0:63 | 60 | // 0:63 |
@@ -55,10 +81,10 @@ struct runlist_chan { | |||
55 | timeslice = (TSG_TIMESLICE_TIMEOUT << TSG_TIMESLICE_SCALE) * 1024 nanoseconds | 81 | timeslice = (TSG_TIMESLICE_TIMEOUT << TSG_TIMESLICE_SCALE) * 1024 nanoseconds |
56 | 82 | ||
57 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_TSG | 83 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_TSG |
58 | TSGID : identifier of the Timeslice group (overlays ENTRY_ID) | ||
59 | TSG_LENGTH : number of channels that are part of this timeslice group | ||
60 | TIMESLICE_SCALE : scale factor for the TSG's timeslice | 84 | TIMESLICE_SCALE : scale factor for the TSG's timeslice |
61 | TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice | 85 | TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice |
86 | TSG_LENGTH : number of channels that are part of this timeslice group | ||
87 | TSGID : identifier of the Timeslice group (overlays ENTRY_ID) | ||
62 | */ | 88 | */ |
63 | struct entry_tsg { | 89 | struct entry_tsg { |
64 | // 0:63 | 90 | // 0:63 |
@@ -130,6 +156,52 @@ typedef union { | |||
130 | uint32_t raw; | 156 | uint32_t raw; |
131 | } runlist_info_t; | 157 | } runlist_info_t; |
132 | 158 | ||
159 | enum CHANNEL_STATUS { | ||
160 | CHANNEL_STATUS_IDLE = 0, | ||
161 | CHANNEL_STATUS_PENDING = 1, | ||
162 | CHANNEL_STATUS_PENDING_CTX_RELOAD = 2, | ||
163 | CHANNEL_STATUS_PENDING_ACQUIRE = 3, | ||
164 | CHANNEL_STATUS_PENDING_ACQ_CTX_RELOAD = 4, | ||
165 | CHANNEL_STATUS_ON_PBDMA = 5, | ||
166 | CHANNEL_STATUS_ON_PBDMA_AND_ENG = 6, | ||
167 | CHANNEL_STATUS_ON_ENG = 7, | ||
168 | CHANNEL_STATUS_ON_ENG_PENDING_ACQUIRE = 8, | ||
169 | CHANNEL_STATUS_ON_ENG_PENDING = 9, | ||
170 | CHANNEL_STATUS_ON_PBDMA_CTX_RELOAD = 10, | ||
171 | CHANNEL_STATUS_ON_PBDMA_AND_ENG_CTX_RELOAD = 11, | ||
172 | CHANNEL_STATUS_ON_ENG_CTX_RELOAD = 12, | ||
173 | CHANNEL_STATUS_ON_ENG_PENDING_CTX_RELOAD = 13, | ||
174 | CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, | ||
175 | }; | ||
176 | |||
177 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) | ||
178 | #define MAX_CHID 512 // TODO: Double-check this is right | ||
179 | // There are a total of 512 possible channels | ||
180 | typedef union { | ||
181 | struct { | ||
182 | // 0:31 | ||
183 | uint32_t inst_ptr:28; | ||
184 | enum INST_TARGET inst_target:2; | ||
185 | uint32_t padding0:1; | ||
186 | bool inst_bind:1; | ||
187 | // 32:64 | ||
188 | bool enable:1; | ||
189 | bool next:1; | ||
190 | uint32_t padding:6; | ||
191 | bool force_ctx_reload:1; | ||
192 | uint32_t padding2:1; | ||
193 | bool enable_set:1; | ||
194 | bool enable_clear:1; | ||
195 | uint32_t padding3:10; | ||
196 | bool pbdma_faulted:1; | ||
197 | bool eng_faulted:1; | ||
198 | enum CHANNEL_STATUS status:4; | ||
199 | bool busy:1; | ||
200 | uint32_t padding4:3; | ||
201 | } __attribute__((packed)); | ||
202 | uint64_t raw; | ||
203 | } channel_ctrl_t; | ||
204 | |||
133 | // TODO(jbakita): Maybe put the above GPU types in a different file. | 205 | // TODO(jbakita): Maybe put the above GPU types in a different file. |
134 | 206 | ||
135 | #define for_chan_in_tsg(chan, tsg) \ | 207 | #define for_chan_in_tsg(chan, tsg) \ |
@@ -146,6 +218,7 @@ struct runlist_iter { | |||
146 | }; | 218 | }; |
147 | 219 | ||
148 | // Defined in runlist.c | 220 | // Defined in runlist.c |
221 | struct gk20a* get_live_gk20a(void); | ||
149 | int get_runlist_iter(struct runlist_iter *rl_iter); | 222 | int get_runlist_iter(struct runlist_iter *rl_iter); |
150 | 223 | ||
151 | static inline struct gk20a *get_gk20a(struct device *dev) { | 224 | static inline struct gk20a *get_gk20a(struct device *dev) { |
@@ -164,6 +237,20 @@ static inline u32 nvdebug_readl(struct gk20a* g, u32 r) { | |||
164 | return readl(g_os->regs + r); | 237 | return readl(g_os->regs + r); |
165 | } | 238 | } |
166 | 239 | ||
240 | // quadword version of nvdebug_readl() | ||
241 | static inline u64 nvdebug_readq(struct gk20a* g, u32 r) { | ||
242 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | ||
243 | u64 ret; | ||
244 | if (unlikely(!g_os->regs)) { | ||
245 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); | ||
246 | return -1; | ||
247 | } | ||
248 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl | ||
249 | ret = readl(g_os->regs + r); | ||
250 | ret |= ((u64)readl(g_os->regs + r + 4)) << 32; | ||
251 | return ret; | ||
252 | } | ||
253 | |||
167 | // Functionally identical to nvgpu_writel() | 254 | // Functionally identical to nvgpu_writel() |
168 | static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { | 255 | static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { |
169 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 256 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); |