diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 12:50:02 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 13:59:56 -0400 |
commit | 48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (patch) | |
tree | d63c3863e9b187fb1f62d2f3c58b8c6a9568b6cc /nvdebug.h | |
parent | ac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (diff) |
Ampere: disable/enable_channel, preempt/switch_to_tsg, and resubmit_runlist
**Modifes the user API from `echo 1 > /proc/gpuX/switch_to_tsg` to
`echo 1 > /proc/gpuX/runlist0/switch_to_tsg` to switch to TSG 1 on
runlist 0 on GPU X for pre-Ampere GPUs (for example).**
Feature changes:
- switch_to_tsg only makes sense on a per-runlist level. Before, this
always operated on runlist0; this commit allows operating on any
runlist by moving the API to the per-runlist paths.
- On Ampere+, channel and TSG IDs are per-runlist, and no longer
GPU-global. Consequently, the disable/enable_channel and
preempt_tsg APIs have been moved from GPU-global to per-runlist
paths on Ampere+.
Bug fixes:
- `preempt_runlist()` is now supported on Maxwell and Pascal.
- `resubmit_runlist()` detects too-old GPUs.
- MAX_CHID corrected from 512 to 511 and documented.
- switch_to_tsg now includes a runlist resubmit, which appears to be
necessary on Turing+ GPUs.
Tested on GK104 (Quadro K5000), GM204 (GTX 970), GP106 (GTX 1060 3GB),
GP104 (GTX 1080 Ti), GP10B (Jetson TX2), GV11B (Jetson Xavier), GV100
(Titan V), TU102 (RTX 2080 Ti), and AD102 (RTX 6000 Ada).
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 37 |
1 files changed, 31 insertions, 6 deletions
@@ -172,7 +172,8 @@ enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | |||
172 | IS_PENDING : Is a context switch pending? (read-only) | 172 | IS_PENDING : Is a context switch pending? (read-only) |
173 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG | 173 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG |
174 | 174 | ||
175 | Support: Kepler, Maxwell, Pascal, Volta, Turing | 175 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta, Turing |
176 | *Fermi only supports PREEMPT_TYPE_CHANNEL. | ||
176 | */ | 177 | */ |
177 | #define NV_PFIFO_PREEMPT 0x00002634 | 178 | #define NV_PFIFO_PREEMPT 0x00002634 |
178 | typedef union { | 179 | typedef union { |
@@ -187,10 +188,26 @@ typedef union { | |||
187 | uint32_t raw; | 188 | uint32_t raw; |
188 | } pfifo_preempt_t; | 189 | } pfifo_preempt_t; |
189 | 190 | ||
191 | /* Preempt a TSG or Runlist by ID | ||
192 | Similar as on older GPUs (see above), but located at an offset in Runlist RAM. | ||
193 | This means that there's one instance of this register for each runlist. | ||
194 | |||
195 | IS_PENDING is now IS_TSG_PREEMPT_PENDING and IS_RUNLIST_PREEMPT_PENDING was | ||
196 | added in the following bit (bit 22). As these fields are unused in nvdebug, | ||
197 | we use the old structure for simplicity. | ||
198 | |||
199 | TYPE is now better described as IS_TSG_PREEMPT. TYPE == 0 requests a preempt | ||
200 | of the runlist (rather than a channel preemption, as on older GPUs). | ||
201 | |||
202 | Support: Ampere, Hopper, Ada, [newer untested] | ||
203 | */ | ||
204 | #define NV_RUNLIST_PREEMPT_GA100 0x098 | ||
205 | #define PREEMPT_TYPE_RUNLIST 0 | ||
206 | |||
190 | /* | 207 | /* |
191 | "Initiate a preempt of the engine by writing the bit associated with its | 208 | "Initiate a preempt of the engine by writing the bit associated with its |
192 | runlist to NV_PFIFO_RUNLIST_PREEMPT... Do not poll NV_PFIFO_RUNLIST_PREEMPT | 209 | runlist to NV_PFIFO_RUNLIST_PREEMPT... Do not poll NV_PFIFO_RUNLIST_PREEMPT |
193 | for the preempt to complete." | 210 | for the preempt to complete." (open-gpu-doc) |
194 | 211 | ||
195 | Useful for preempting multiple runlists at once. | 212 | Useful for preempting multiple runlists at once. |
196 | 213 | ||
@@ -204,7 +221,10 @@ typedef union { | |||
204 | rl_preempt.raw |= BIT(nr); | 221 | rl_preempt.raw |= BIT(nr); |
205 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); | 222 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); |
206 | 223 | ||
207 | Support: Volta, Turing | 224 | Support: Maxwell, Pascal, Volta, Turing |
225 | |||
226 | This register was deleted starting with Ampere, with functionality subsumed by | ||
227 | the NV_RUNLIST_PREEMPT register. | ||
208 | */ | 228 | */ |
209 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 | 229 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 |
210 | typedef union { | 230 | typedef union { |
@@ -285,7 +305,7 @@ typedef union { | |||
285 | TARGET : Aperture of runlist (video or system memory) | 305 | TARGET : Aperture of runlist (video or system memory) |
286 | 306 | ||
287 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta | 307 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta |
288 | *Fermi may expose this information 8 bytes earlier, starting at 0x227C? | 308 | *Fermi may expose ENG_RUNLING_* 8 bytes earlier, starting at 0x227C? |
289 | */ | 309 | */ |
290 | #define NV_PFIFO_RUNLIST_BASE_GF100 0x00002270 // Write-only | 310 | #define NV_PFIFO_RUNLIST_BASE_GF100 0x00002270 // Write-only |
291 | #define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only | 311 | #define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only |
@@ -428,7 +448,11 @@ typedef union { | |||
428 | See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc | 448 | See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc |
429 | */ | 449 | */ |
430 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) | 450 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) |
431 | #define MAX_CHID 512 | 451 | // Maximum valid channel index in the PCCSR region |
452 | // Channel IDs start at 0, and there are 4096 bytes of 8-byte CCSR entries (per | ||
453 | // NV_PCCSR_CHANNEL_INST__SIZE_1 in at least Volta and Turing), yielding a total | ||
454 | // of 512 channel IDs, with a maximum ID of 511. | ||
455 | #define MAX_CHID 511 | ||
432 | typedef union { | 456 | typedef union { |
433 | struct { | 457 | struct { |
434 | // 0:31 | 458 | // 0:31 |
@@ -554,6 +578,7 @@ typedef union { | |||
554 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 | 578 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 |
555 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU | 579 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU |
556 | 580 | ||
581 | #define NV_CHIP_ID_FERMI 0x0C0 | ||
557 | #define NV_CHIP_ID_KEPLER 0x0E0 | 582 | #define NV_CHIP_ID_KEPLER 0x0E0 |
558 | #define NV_CHIP_ID_MAXWELL 0x120 | 583 | #define NV_CHIP_ID_MAXWELL 0x120 |
559 | #define NV_CHIP_ID_PASCAL 0x130 | 584 | #define NV_CHIP_ID_PASCAL 0x130 |
@@ -1505,7 +1530,7 @@ int get_runlist_iter( | |||
1505 | struct nvdebug_state *g, | 1530 | struct nvdebug_state *g, |
1506 | int rl_id, | 1531 | int rl_id, |
1507 | struct runlist_iter *rl_iter /* out */); | 1532 | struct runlist_iter *rl_iter /* out */); |
1508 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); | 1533 | int preempt_tsg(struct nvdebug_state *g, uint32_t rl_id, uint32_t tsg_id); |
1509 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1534 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); |
1510 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1535 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); |
1511 | 1536 | ||