diff options
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 116 |
1 files changed, 83 insertions, 33 deletions
@@ -131,8 +131,8 @@ struct gm107_runlist_chan { | |||
131 | GPU instance addresses with Volta. | 131 | GPU instance addresses with Volta. |
132 | */ | 132 | */ |
133 | 133 | ||
134 | // Support: Volta, Ampere*, Turing* | 134 | // Support: Volta, Turing*, Ampere* |
135 | // *These treat the top 8 bits of TSGID as GFID (unused) | 135 | // *These treat bits 4:11 (8 bits) as GFID (unused) |
136 | struct gv100_runlist_tsg { | 136 | struct gv100_runlist_tsg { |
137 | // 0:63 | 137 | // 0:63 |
138 | enum ENTRY_TYPE entry_type:1; | 138 | enum ENTRY_TYPE entry_type:1; |
@@ -166,7 +166,7 @@ enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | |||
166 | 166 | ||
167 | /* Preempt a TSG or Channel by ID | 167 | /* Preempt a TSG or Channel by ID |
168 | ID/CHID : Id of TSG or channel to preempt | 168 | ID/CHID : Id of TSG or channel to preempt |
169 | IS_PENDING : Is a context switch pending? | 169 | IS_PENDING : Is a context switch pending? (read-only) |
170 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG | 170 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG |
171 | 171 | ||
172 | Support: Kepler, Maxwell, Pascal, Volta, Turing | 172 | Support: Kepler, Maxwell, Pascal, Volta, Turing |
@@ -201,7 +201,7 @@ typedef union { | |||
201 | rl_preempt.raw |= BIT(nr); | 201 | rl_preempt.raw |= BIT(nr); |
202 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); | 202 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); |
203 | 203 | ||
204 | Support: Volta | 204 | Support: Volta, Turing |
205 | */ | 205 | */ |
206 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 | 206 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 |
207 | typedef union { | 207 | typedef union { |
@@ -255,39 +255,83 @@ typedef union { | |||
255 | * cause a system to hang/stop responding." | 255 | * cause a system to hang/stop responding." |
256 | */ | 256 | */ |
257 | 257 | ||
258 | // Note: This is different with Turing | 258 | /* Runlist Metadata (up through Volta) |
259 | // Support: Fermi, Kepler, Maxwell, Pascal, Volta | 259 | "Software specifies the GPU contexts that hardware should "run" by writing a |
260 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 | 260 | list of entries (known as a "runlist") to a 4k-aligned area of memory (beginning |
261 | #define NV_PFIFO_ENG_RUNLIST_BASE(i) (0x00002280+(i)*8) | 261 | at NV_PFIFO_RUNLIST_BASE), and by notifying Host that a new list is available |
262 | (by writing to NV_PFIFO_RUNLIST). | ||
263 | |||
264 | Submission of a new runlist causes Host to expire the timeslice of all work | ||
265 | scheduled by the previous runlist, allowing it to schedule the channels present | ||
266 | in the new runlist once they are fetched. SW can check the status of the runlist | ||
267 | by polling NV_PFIFO_ENG_RUNLIST_PENDING. (see dev_fifo.ref NV_PFIFO_RUNLIST for | ||
268 | a full description of the runlist submit mechanism). | ||
269 | |||
270 | Runlists can be stored in system memory or video memory (as specified by | ||
271 | NV_PFIFO_RUNLIST_BASE_TARGET). If a runlist is stored in video memory, software | ||
272 | will have to execute flush or read the last entry written before submitting the | ||
273 | runlist to Host to guarantee coherency." (volta/dev_ram.ref.txt) | ||
274 | |||
275 | We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where | ||
276 | i is a runlist index). Runlists are configured via the seperate, writable | ||
277 | *_PFIFO_RUNLIST_* register; see open-gpu-doc for more on that. | ||
278 | |||
279 | LEN : Number of entries in runlist | ||
280 | IS_PENDING : Is runlist committed? | ||
281 | PTR : Pointer to start of 4k-aligned runlist (upper 28 of 40 bits) | ||
282 | TARGET : Aperture of runlist (video or system memory) | ||
283 | |||
284 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta | ||
285 | *Fermi may expose this information 8 bytes earlier, starting at 0x227C? | ||
286 | */ | ||
287 | #define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only | ||
262 | typedef union { | 288 | typedef union { |
263 | struct { | 289 | struct { |
290 | // NV_PFIFO_ENG_RUNLIST_BASE_* fields | ||
264 | uint32_t ptr:28; | 291 | uint32_t ptr:28; |
265 | enum INST_TARGET target:2; | 292 | enum INST_TARGET target:2; |
266 | uint32_t padding:2; | 293 | uint32_t padding1:2; |
294 | // NV_PFIFO_ENG_RUNLIST_* fields | ||
295 | uint16_t len:16; | ||
296 | uint32_t padding2:4; | ||
297 | bool is_pending:1; | ||
298 | uint32_t padding3:11; | ||
267 | } __attribute__((packed)); | 299 | } __attribute__((packed)); |
268 | uint32_t raw; | 300 | uint64_t raw; |
269 | } runlist_base_t; | 301 | } eng_runlist_gf100_t; |
270 | 302 | ||
271 | // Support: Kepler, Maxwell, Pascal, Volta | 303 | /* |
272 | // Works on Fermi, but id is one bit longer and is b11111 | 304 | Starting with Turing, the seperate registers for reading and writing runlist |
273 | #define NV_PFIFO_RUNLIST 0x00002274 | 305 | configuration were dropped in favor of read/write indexed registers. As part |
274 | #define NV_PFIFO_ENG_RUNLIST(i) (0x00002284+(i)*8) | 306 | of this, the layout was modified to allow for larger runlist pointers (upper |
307 | 52 of 64 bits). | ||
308 | |||
309 | Support: Turing, Ampere, Lovelace?, Hopper? | ||
310 | */ | ||
311 | // Support: Turing | ||
312 | #define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write | ||
313 | #define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write | ||
275 | typedef union { | 314 | typedef union { |
276 | // RUNLIST fields | ||
277 | struct { | 315 | struct { |
278 | uint32_t len:16; | 316 | enum INST_TARGET target:2; |
279 | uint32_t padding:4; | 317 | uint32_t padding:10; |
280 | uint32_t id:4; // Runlist ID (each engine may have a seperate runlist) | 318 | uint64_t ptr:28; |
281 | uint32_t padding2:8; | 319 | uint32_t padding2:24; |
282 | } __attribute__((packed)); | 320 | } __attribute__((packed)); |
283 | // ENG_RUNLIST fields that differ | 321 | uint64_t raw; |
322 | } runlist_base_tu102_t; | ||
323 | |||
324 | typedef union { | ||
284 | struct { | 325 | struct { |
285 | uint32_t padding3:20; | 326 | uint16_t len:16; |
286 | bool is_pending:1; // Is runlist not yet committed? | 327 | uint16_t offset:16; |
287 | uint32_t padding4:11; | 328 | uint32_t preempted_tsgid:14; |
329 | bool valid_preempted_tsgid:1; | ||
330 | bool is_pending:1; | ||
331 | uint32_t preempted_offset:16; | ||
288 | } __attribute__((packed)); | 332 | } __attribute__((packed)); |
289 | uint32_t raw; | 333 | uint64_t raw; |
290 | } runlist_info_t; | 334 | } runlist_submit_tu102_t; |
291 | 335 | ||
292 | enum CHANNEL_STATUS { | 336 | enum CHANNEL_STATUS { |
293 | CHANNEL_STATUS_IDLE = 0, | 337 | CHANNEL_STATUS_IDLE = 0, |
@@ -307,8 +351,13 @@ enum CHANNEL_STATUS { | |||
307 | CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, | 351 | CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, |
308 | }; | 352 | }; |
309 | 353 | ||
354 | /* Programmable Channel Control System RAM (PCCSR) | ||
355 | |||
356 | 512-entry array of channel control and status data structures. | ||
357 | |||
358 | Support: Fermi, Maxwell, Pascal, Volta, Turing, [more?] | ||
359 | */ | ||
310 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) | 360 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) |
311 | // There are a total of 512 possible channels | ||
312 | #define MAX_CHID 512 | 361 | #define MAX_CHID 512 |
313 | typedef union { | 362 | typedef union { |
314 | struct { | 363 | struct { |
@@ -1023,12 +1072,12 @@ VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsg_length); | |||
1023 | struct runlist_iter { | 1072 | struct runlist_iter { |
1024 | // Pointer to either a TSG or channel entry (they're the same size) | 1073 | // Pointer to either a TSG or channel entry (they're the same size) |
1025 | void *curr_entry; | 1074 | void *curr_entry; |
1026 | // This should be set to tsg_length when a TSG is reached, and | 1075 | // This should be set to tsg_length + 1 when a TSG is reached, and |
1027 | // decremented as each subsequent channel is printed. This allows us to | 1076 | // decremented each time _next() is called. This allows us to |
1028 | // track which channel are and are not part of the TSG. | 1077 | // track which channels are and are not part of the TSG. |
1029 | int channels_left_in_tsg; | 1078 | int entries_left_in_tsg; |
1030 | // Total runlist length, etc | 1079 | // Number of entries in runlist |
1031 | runlist_info_t rl_info; | 1080 | int len; |
1032 | }; | 1081 | }; |
1033 | 1082 | ||
1034 | #define NVDEBUG_MAX_DEVICES 8 | 1083 | #define NVDEBUG_MAX_DEVICES 8 |
@@ -1037,6 +1086,7 @@ extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | |||
1037 | // Defined in runlist.c | 1086 | // Defined in runlist.c |
1038 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); | 1087 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); |
1039 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); | 1088 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); |
1089 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); | ||
1040 | 1090 | ||
1041 | // Defined in mmu.c | 1091 | // Defined in mmu.c |
1042 | uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr); | 1092 | uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr); |