aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h116
1 files changed, 83 insertions, 33 deletions
diff --git a/nvdebug.h b/nvdebug.h
index 2fc8c63..f65b403 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -131,8 +131,8 @@ struct gm107_runlist_chan {
131 GPU instance addresses with Volta. 131 GPU instance addresses with Volta.
132*/ 132*/
133 133
134// Support: Volta, Ampere*, Turing* 134// Support: Volta, Turing*, Ampere*
135// *These treat the top 8 bits of TSGID as GFID (unused) 135// *These treat bits 4:11 (8 bits) as GFID (unused)
136struct gv100_runlist_tsg { 136struct gv100_runlist_tsg {
137// 0:63 137// 0:63
138 enum ENTRY_TYPE entry_type:1; 138 enum ENTRY_TYPE entry_type:1;
@@ -166,7 +166,7 @@ enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1};
166 166
167/* Preempt a TSG or Channel by ID 167/* Preempt a TSG or Channel by ID
168 ID/CHID : Id of TSG or channel to preempt 168 ID/CHID : Id of TSG or channel to preempt
169 IS_PENDING : Is a context switch pending? 169 IS_PENDING : Is a context switch pending? (read-only)
170 TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG 170 TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG
171 171
172 Support: Kepler, Maxwell, Pascal, Volta, Turing 172 Support: Kepler, Maxwell, Pascal, Volta, Turing
@@ -201,7 +201,7 @@ typedef union {
201 rl_preempt.raw |= BIT(nr); 201 rl_preempt.raw |= BIT(nr);
202 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); 202 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw);
203 203
204 Support: Volta 204 Support: Volta, Turing
205*/ 205*/
206#define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 206#define NV_PFIFO_RUNLIST_PREEMPT 0x00002638
207typedef union { 207typedef union {
@@ -255,39 +255,83 @@ typedef union {
255 * cause a system to hang/stop responding." 255 * cause a system to hang/stop responding."
256 */ 256 */
257 257
258// Note: This is different with Turing 258/* Runlist Metadata (up through Volta)
259// Support: Fermi, Kepler, Maxwell, Pascal, Volta 259 "Software specifies the GPU contexts that hardware should "run" by writing a
260#define NV_PFIFO_RUNLIST_BASE 0x00002270 260 list of entries (known as a "runlist") to a 4k-aligned area of memory (beginning
261#define NV_PFIFO_ENG_RUNLIST_BASE(i) (0x00002280+(i)*8) 261 at NV_PFIFO_RUNLIST_BASE), and by notifying Host that a new list is available
262 (by writing to NV_PFIFO_RUNLIST).
263
264 Submission of a new runlist causes Host to expire the timeslice of all work
265 scheduled by the previous runlist, allowing it to schedule the channels present
266 in the new runlist once they are fetched. SW can check the status of the runlist
267 by polling NV_PFIFO_ENG_RUNLIST_PENDING. (see dev_fifo.ref NV_PFIFO_RUNLIST for
268 a full description of the runlist submit mechanism).
269
270 Runlists can be stored in system memory or video memory (as specified by
271 NV_PFIFO_RUNLIST_BASE_TARGET). If a runlist is stored in video memory, software
272 will have to execute flush or read the last entry written before submitting the
273 runlist to Host to guarantee coherency." (volta/dev_ram.ref.txt)
274
275 We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where
276 i is a runlist index). Runlists are configured via the seperate, writable
277 *_PFIFO_RUNLIST_* register; see open-gpu-doc for more on that.
278
279 LEN : Number of entries in runlist
280 IS_PENDING : Is runlist committed?
281 PTR : Pointer to start of 4k-aligned runlist (upper 28 of 40 bits)
282 TARGET : Aperture of runlist (video or system memory)
283
284 Support: Fermi*, Kepler, Maxwell, Pascal, Volta
285 *Fermi may expose this information 8 bytes earlier, starting at 0x227C?
286*/
287#define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only
262typedef union { 288typedef union {
263 struct { 289 struct {
290 // NV_PFIFO_ENG_RUNLIST_BASE_* fields
264 uint32_t ptr:28; 291 uint32_t ptr:28;
265 enum INST_TARGET target:2; 292 enum INST_TARGET target:2;
266 uint32_t padding:2; 293 uint32_t padding1:2;
294 // NV_PFIFO_ENG_RUNLIST_* fields
295 uint16_t len:16;
296 uint32_t padding2:4;
297 bool is_pending:1;
298 uint32_t padding3:11;
267 } __attribute__((packed)); 299 } __attribute__((packed));
268 uint32_t raw; 300 uint64_t raw;
269} runlist_base_t; 301} eng_runlist_gf100_t;
270 302
271// Support: Kepler, Maxwell, Pascal, Volta 303/*
272// Works on Fermi, but id is one bit longer and is b11111 304 Starting with Turing, the seperate registers for reading and writing runlist
273#define NV_PFIFO_RUNLIST 0x00002274 305 configuration were dropped in favor of read/write indexed registers. As part
274#define NV_PFIFO_ENG_RUNLIST(i) (0x00002284+(i)*8) 306 of this, the layout was modified to allow for larger runlist pointers (upper
307 52 of 64 bits).
308
309 Support: Turing, Ampere, Lovelace?, Hopper?
310*/
311// Support: Turing
312#define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write
313#define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write
275typedef union { 314typedef union {
276 // RUNLIST fields
277 struct { 315 struct {
278 uint32_t len:16; 316 enum INST_TARGET target:2;
279 uint32_t padding:4; 317 uint32_t padding:10;
280 uint32_t id:4; // Runlist ID (each engine may have a seperate runlist) 318 uint64_t ptr:28;
281 uint32_t padding2:8; 319 uint32_t padding2:24;
282 } __attribute__((packed)); 320 } __attribute__((packed));
283 // ENG_RUNLIST fields that differ 321 uint64_t raw;
322} runlist_base_tu102_t;
323
324typedef union {
284 struct { 325 struct {
285 uint32_t padding3:20; 326 uint16_t len:16;
286 bool is_pending:1; // Is runlist not yet committed? 327 uint16_t offset:16;
287 uint32_t padding4:11; 328 uint32_t preempted_tsgid:14;
329 bool valid_preempted_tsgid:1;
330 bool is_pending:1;
331 uint32_t preempted_offset:16;
288 } __attribute__((packed)); 332 } __attribute__((packed));
289 uint32_t raw; 333 uint64_t raw;
290} runlist_info_t; 334} runlist_submit_tu102_t;
291 335
292enum CHANNEL_STATUS { 336enum CHANNEL_STATUS {
293 CHANNEL_STATUS_IDLE = 0, 337 CHANNEL_STATUS_IDLE = 0,
@@ -307,8 +351,13 @@ enum CHANNEL_STATUS {
307 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, 351 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14,
308}; 352};
309 353
354/* Programmable Channel Control System RAM (PCCSR)
355
356 512-entry array of channel control and status data structures.
357
358 Support: Fermi, Maxwell, Pascal, Volta, Turing, [more?]
359*/
310#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) 360#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8)
311// There are a total of 512 possible channels
312#define MAX_CHID 512 361#define MAX_CHID 512
313typedef union { 362typedef union {
314 struct { 363 struct {
@@ -1023,12 +1072,12 @@ VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsg_length);
1023struct runlist_iter { 1072struct runlist_iter {
1024 // Pointer to either a TSG or channel entry (they're the same size) 1073 // Pointer to either a TSG or channel entry (they're the same size)
1025 void *curr_entry; 1074 void *curr_entry;
1026 // This should be set to tsg_length when a TSG is reached, and 1075 // This should be set to tsg_length + 1 when a TSG is reached, and
1027 // decremented as each subsequent channel is printed. This allows us to 1076 // decremented each time _next() is called. This allows us to
1028 // track which channel are and are not part of the TSG. 1077 // track which channels are and are not part of the TSG.
1029 int channels_left_in_tsg; 1078 int entries_left_in_tsg;
1030 // Total runlist length, etc 1079 // Number of entries in runlist
1031 runlist_info_t rl_info; 1080 int len;
1032}; 1081};
1033 1082
1034#define NVDEBUG_MAX_DEVICES 8 1083#define NVDEBUG_MAX_DEVICES 8
@@ -1037,6 +1086,7 @@ extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
1037// Defined in runlist.c 1086// Defined in runlist.c
1038int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); 1087int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter);
1039int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); 1088int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id);
1089int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id);
1040 1090
1041// Defined in mmu.c 1091// Defined in mmu.c
1042uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr); 1092uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr);