aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-09-19 12:50:02 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2024-09-19 13:59:56 -0400
commit48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (patch)
treed63c3863e9b187fb1f62d2f3c58b8c6a9568b6cc /nvdebug.h
parentac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (diff)
Ampere: disable/enable_channel, preempt/switch_to_tsg, and resubmit_runlist
**Modifes the user API from `echo 1 > /proc/gpuX/switch_to_tsg` to `echo 1 > /proc/gpuX/runlist0/switch_to_tsg` to switch to TSG 1 on runlist 0 on GPU X for pre-Ampere GPUs (for example).** Feature changes: - switch_to_tsg only makes sense on a per-runlist level. Before, this always operated on runlist0; this commit allows operating on any runlist by moving the API to the per-runlist paths. - On Ampere+, channel and TSG IDs are per-runlist, and no longer GPU-global. Consequently, the disable/enable_channel and preempt_tsg APIs have been moved from GPU-global to per-runlist paths on Ampere+. Bug fixes: - `preempt_runlist()` is now supported on Maxwell and Pascal. - `resubmit_runlist()` detects too-old GPUs. - MAX_CHID corrected from 512 to 511 and documented. - switch_to_tsg now includes a runlist resubmit, which appears to be necessary on Turing+ GPUs. Tested on GK104 (Quadro K5000), GM204 (GTX 970), GP106 (GTX 1060 3GB), GP104 (GTX 1080 Ti), GP10B (Jetson TX2), GV11B (Jetson Xavier), GV100 (Titan V), TU102 (RTX 2080 Ti), and AD102 (RTX 6000 Ada).
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h37
1 files changed, 31 insertions, 6 deletions
diff --git a/nvdebug.h b/nvdebug.h
index 26689d9..26167a7 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -172,7 +172,8 @@ enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1};
172 IS_PENDING : Is a context switch pending? (read-only) 172 IS_PENDING : Is a context switch pending? (read-only)
173 TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG 173 TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG
174 174
175 Support: Kepler, Maxwell, Pascal, Volta, Turing 175 Support: Fermi*, Kepler, Maxwell, Pascal, Volta, Turing
176 *Fermi only supports PREEMPT_TYPE_CHANNEL.
176*/ 177*/
177#define NV_PFIFO_PREEMPT 0x00002634 178#define NV_PFIFO_PREEMPT 0x00002634
178typedef union { 179typedef union {
@@ -187,10 +188,26 @@ typedef union {
187 uint32_t raw; 188 uint32_t raw;
188} pfifo_preempt_t; 189} pfifo_preempt_t;
189 190
191/* Preempt a TSG or Runlist by ID
192 Similar as on older GPUs (see above), but located at an offset in Runlist RAM.
193 This means that there's one instance of this register for each runlist.
194
195 IS_PENDING is now IS_TSG_PREEMPT_PENDING and IS_RUNLIST_PREEMPT_PENDING was
196 added in the following bit (bit 22). As these fields are unused in nvdebug,
197 we use the old structure for simplicity.
198
199 TYPE is now better described as IS_TSG_PREEMPT. TYPE == 0 requests a preempt
200 of the runlist (rather than a channel preemption, as on older GPUs).
201
202 Support: Ampere, Hopper, Ada, [newer untested]
203*/
204#define NV_RUNLIST_PREEMPT_GA100 0x098
205#define PREEMPT_TYPE_RUNLIST 0
206
190/* 207/*
191 "Initiate a preempt of the engine by writing the bit associated with its 208 "Initiate a preempt of the engine by writing the bit associated with its
192 runlist to NV_PFIFO_RUNLIST_PREEMPT... Do not poll NV_PFIFO_RUNLIST_PREEMPT 209 runlist to NV_PFIFO_RUNLIST_PREEMPT... Do not poll NV_PFIFO_RUNLIST_PREEMPT
193 for the preempt to complete." 210 for the preempt to complete." (open-gpu-doc)
194 211
195 Useful for preempting multiple runlists at once. 212 Useful for preempting multiple runlists at once.
196 213
@@ -204,7 +221,10 @@ typedef union {
204 rl_preempt.raw |= BIT(nr); 221 rl_preempt.raw |= BIT(nr);
205 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); 222 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw);
206 223
207 Support: Volta, Turing 224 Support: Maxwell, Pascal, Volta, Turing
225
226 This register was deleted starting with Ampere, with functionality subsumed by
227 the NV_RUNLIST_PREEMPT register.
208*/ 228*/
209#define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 229#define NV_PFIFO_RUNLIST_PREEMPT 0x00002638
210typedef union { 230typedef union {
@@ -285,7 +305,7 @@ typedef union {
285 TARGET : Aperture of runlist (video or system memory) 305 TARGET : Aperture of runlist (video or system memory)
286 306
287 Support: Fermi*, Kepler, Maxwell, Pascal, Volta 307 Support: Fermi*, Kepler, Maxwell, Pascal, Volta
288 *Fermi may expose this information 8 bytes earlier, starting at 0x227C? 308 *Fermi may expose ENG_RUNLING_* 8 bytes earlier, starting at 0x227C?
289*/ 309*/
290#define NV_PFIFO_RUNLIST_BASE_GF100 0x00002270 // Write-only 310#define NV_PFIFO_RUNLIST_BASE_GF100 0x00002270 // Write-only
291#define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only 311#define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only
@@ -428,7 +448,11 @@ typedef union {
428 See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc 448 See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc
429*/ 449*/
430#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) 450#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8)
431#define MAX_CHID 512 451// Maximum valid channel index in the PCCSR region
452// Channel IDs start at 0, and there are 4096 bytes of 8-byte CCSR entries (per
453// NV_PCCSR_CHANNEL_INST__SIZE_1 in at least Volta and Turing), yielding a total
454// of 512 channel IDs, with a maximum ID of 511.
455#define MAX_CHID 511
432typedef union { 456typedef union {
433 struct { 457 struct {
434// 0:31 458// 0:31
@@ -554,6 +578,7 @@ typedef union {
554#define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 578#define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060
555#define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU 579#define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU
556 580
581#define NV_CHIP_ID_FERMI 0x0C0
557#define NV_CHIP_ID_KEPLER 0x0E0 582#define NV_CHIP_ID_KEPLER 0x0E0
558#define NV_CHIP_ID_MAXWELL 0x120 583#define NV_CHIP_ID_MAXWELL 0x120
559#define NV_CHIP_ID_PASCAL 0x130 584#define NV_CHIP_ID_PASCAL 0x130
@@ -1505,7 +1530,7 @@ int get_runlist_iter(
1505 struct nvdebug_state *g, 1530 struct nvdebug_state *g,
1506 int rl_id, 1531 int rl_id,
1507 struct runlist_iter *rl_iter /* out */); 1532 struct runlist_iter *rl_iter /* out */);
1508int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); 1533int preempt_tsg(struct nvdebug_state *g, uint32_t rl_id, uint32_t tsg_id);
1509int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); 1534int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id);
1510int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); 1535int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id);
1511 1536