aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2024-04-08 15:35:54 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2024-04-08 17:09:13 -0400
commit47506870790989b5e2d9a6128711d96c487f0d7b (patch)
tree98c09b9464af4c4a983f75b17568aa5ca919d886
parent14cb76b1a7e93a5f3900ea7696071dcc281a3586 (diff)
Heavily refactor runlist code for correctness and Turing support
- Support differently-formatted runlist registers on Turing - Support different runlist register offsets on Turing - Fix incorrect indenting when printing the runlist - Fix `preempt_tsg` and `switch_to_tsg` API implementations to correctly interface with the hardware (previously, they would try to disable scheduling for the last-updated runlist pointer, which was nonsense, and just an artifact of my early misunderstandings of how the NV_PFIFO_RUNLIST* registers worked). - Remove misused NV_PFIFO_RUNLIST and NV_PFIFO_RUNLIST_BASE registers - Refactor `runlist.c` to use the APIs from `bus.c`
-rw-r--r--nvdebug.h116
-rw-r--r--runlist.c212
-rw-r--r--runlist_procfs.c52
3 files changed, 192 insertions, 188 deletions
diff --git a/nvdebug.h b/nvdebug.h
index 2fc8c63..f65b403 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -131,8 +131,8 @@ struct gm107_runlist_chan {
131 GPU instance addresses with Volta. 131 GPU instance addresses with Volta.
132*/ 132*/
133 133
134// Support: Volta, Ampere*, Turing* 134// Support: Volta, Turing*, Ampere*
135// *These treat the top 8 bits of TSGID as GFID (unused) 135// *These treat bits 4:11 (8 bits) as GFID (unused)
136struct gv100_runlist_tsg { 136struct gv100_runlist_tsg {
137// 0:63 137// 0:63
138 enum ENTRY_TYPE entry_type:1; 138 enum ENTRY_TYPE entry_type:1;
@@ -166,7 +166,7 @@ enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1};
166 166
167/* Preempt a TSG or Channel by ID 167/* Preempt a TSG or Channel by ID
168 ID/CHID : Id of TSG or channel to preempt 168 ID/CHID : Id of TSG or channel to preempt
169 IS_PENDING : Is a context switch pending? 169 IS_PENDING : Is a context switch pending? (read-only)
170 TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG 170 TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG
171 171
172 Support: Kepler, Maxwell, Pascal, Volta, Turing 172 Support: Kepler, Maxwell, Pascal, Volta, Turing
@@ -201,7 +201,7 @@ typedef union {
201 rl_preempt.raw |= BIT(nr); 201 rl_preempt.raw |= BIT(nr);
202 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); 202 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw);
203 203
204 Support: Volta 204 Support: Volta, Turing
205*/ 205*/
206#define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 206#define NV_PFIFO_RUNLIST_PREEMPT 0x00002638
207typedef union { 207typedef union {
@@ -255,39 +255,83 @@ typedef union {
255 * cause a system to hang/stop responding." 255 * cause a system to hang/stop responding."
256 */ 256 */
257 257
258// Note: This is different with Turing 258/* Runlist Metadata (up through Volta)
259// Support: Fermi, Kepler, Maxwell, Pascal, Volta 259 "Software specifies the GPU contexts that hardware should "run" by writing a
260#define NV_PFIFO_RUNLIST_BASE 0x00002270 260 list of entries (known as a "runlist") to a 4k-aligned area of memory (beginning
261#define NV_PFIFO_ENG_RUNLIST_BASE(i) (0x00002280+(i)*8) 261 at NV_PFIFO_RUNLIST_BASE), and by notifying Host that a new list is available
262 (by writing to NV_PFIFO_RUNLIST).
263
264 Submission of a new runlist causes Host to expire the timeslice of all work
265 scheduled by the previous runlist, allowing it to schedule the channels present
266 in the new runlist once they are fetched. SW can check the status of the runlist
267 by polling NV_PFIFO_ENG_RUNLIST_PENDING. (see dev_fifo.ref NV_PFIFO_RUNLIST for
268 a full description of the runlist submit mechanism).
269
270 Runlists can be stored in system memory or video memory (as specified by
271 NV_PFIFO_RUNLIST_BASE_TARGET). If a runlist is stored in video memory, software
272 will have to execute flush or read the last entry written before submitting the
273 runlist to Host to guarantee coherency." (volta/dev_ram.ref.txt)
274
275 We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where
276 i is a runlist index). Runlists are configured via the seperate, writable
277 *_PFIFO_RUNLIST_* register; see open-gpu-doc for more on that.
278
279 LEN : Number of entries in runlist
280 IS_PENDING : Is runlist committed?
281 PTR : Pointer to start of 4k-aligned runlist (upper 28 of 40 bits)
282 TARGET : Aperture of runlist (video or system memory)
283
284 Support: Fermi*, Kepler, Maxwell, Pascal, Volta
285 *Fermi may expose this information 8 bytes earlier, starting at 0x227C?
286*/
287#define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only
262typedef union { 288typedef union {
263 struct { 289 struct {
290 // NV_PFIFO_ENG_RUNLIST_BASE_* fields
264 uint32_t ptr:28; 291 uint32_t ptr:28;
265 enum INST_TARGET target:2; 292 enum INST_TARGET target:2;
266 uint32_t padding:2; 293 uint32_t padding1:2;
294 // NV_PFIFO_ENG_RUNLIST_* fields
295 uint16_t len:16;
296 uint32_t padding2:4;
297 bool is_pending:1;
298 uint32_t padding3:11;
267 } __attribute__((packed)); 299 } __attribute__((packed));
268 uint32_t raw; 300 uint64_t raw;
269} runlist_base_t; 301} eng_runlist_gf100_t;
270 302
271// Support: Kepler, Maxwell, Pascal, Volta 303/*
272// Works on Fermi, but id is one bit longer and is b11111 304 Starting with Turing, the seperate registers for reading and writing runlist
273#define NV_PFIFO_RUNLIST 0x00002274 305 configuration were dropped in favor of read/write indexed registers. As part
274#define NV_PFIFO_ENG_RUNLIST(i) (0x00002284+(i)*8) 306 of this, the layout was modified to allow for larger runlist pointers (upper
307 52 of 64 bits).
308
309 Support: Turing, Ampere, Lovelace?, Hopper?
310*/
311// Support: Turing
312#define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write
313#define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write
275typedef union { 314typedef union {
276 // RUNLIST fields
277 struct { 315 struct {
278 uint32_t len:16; 316 enum INST_TARGET target:2;
279 uint32_t padding:4; 317 uint32_t padding:10;
280 uint32_t id:4; // Runlist ID (each engine may have a seperate runlist) 318 uint64_t ptr:28;
281 uint32_t padding2:8; 319 uint32_t padding2:24;
282 } __attribute__((packed)); 320 } __attribute__((packed));
283 // ENG_RUNLIST fields that differ 321 uint64_t raw;
322} runlist_base_tu102_t;
323
324typedef union {
284 struct { 325 struct {
285 uint32_t padding3:20; 326 uint16_t len:16;
286 bool is_pending:1; // Is runlist not yet committed? 327 uint16_t offset:16;
287 uint32_t padding4:11; 328 uint32_t preempted_tsgid:14;
329 bool valid_preempted_tsgid:1;
330 bool is_pending:1;
331 uint32_t preempted_offset:16;
288 } __attribute__((packed)); 332 } __attribute__((packed));
289 uint32_t raw; 333 uint64_t raw;
290} runlist_info_t; 334} runlist_submit_tu102_t;
291 335
292enum CHANNEL_STATUS { 336enum CHANNEL_STATUS {
293 CHANNEL_STATUS_IDLE = 0, 337 CHANNEL_STATUS_IDLE = 0,
@@ -307,8 +351,13 @@ enum CHANNEL_STATUS {
307 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, 351 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14,
308}; 352};
309 353
354/* Programmable Channel Control System RAM (PCCSR)
355
356 512-entry array of channel control and status data structures.
357
358 Support: Fermi, Maxwell, Pascal, Volta, Turing, [more?]
359*/
310#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) 360#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8)
311// There are a total of 512 possible channels
312#define MAX_CHID 512 361#define MAX_CHID 512
313typedef union { 362typedef union {
314 struct { 363 struct {
@@ -1023,12 +1072,12 @@ VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsg_length);
1023struct runlist_iter { 1072struct runlist_iter {
1024 // Pointer to either a TSG or channel entry (they're the same size) 1073 // Pointer to either a TSG or channel entry (they're the same size)
1025 void *curr_entry; 1074 void *curr_entry;
1026 // This should be set to tsg_length when a TSG is reached, and 1075 // This should be set to tsg_length + 1 when a TSG is reached, and
1027 // decremented as each subsequent channel is printed. This allows us to 1076 // decremented each time _next() is called. This allows us to
1028 // track which channel are and are not part of the TSG. 1077 // track which channels are and are not part of the TSG.
1029 int channels_left_in_tsg; 1078 int entries_left_in_tsg;
1030 // Total runlist length, etc 1079 // Number of entries in runlist
1031 runlist_info_t rl_info; 1080 int len;
1032}; 1081};
1033 1082
1034#define NVDEBUG_MAX_DEVICES 8 1083#define NVDEBUG_MAX_DEVICES 8
@@ -1037,6 +1086,7 @@ extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
1037// Defined in runlist.c 1086// Defined in runlist.c
1038int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); 1087int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter);
1039int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); 1088int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id);
1089int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id);
1040 1090
1041// Defined in mmu.c 1091// Defined in mmu.c
1042uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr); 1092uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr);
diff --git a/runlist.c b/runlist.c
index ed35c7e..c725e77 100644
--- a/runlist.c
+++ b/runlist.c
@@ -1,172 +1,134 @@
1#include <linux/kernel.h> // Kernel types 1/* Copyright 2024 Joshua Bakita
2 * Helpers for dealing with the runlist and other Host (PFIFO) registers
3 */
4#include <linux/printk.h> // For printk()
5#include <asm/errno.h> // For error defines
6#include <asm/io.h> // For phys_to_virt()
2 7
3#include "nvdebug.h" 8#include "nvdebug.h"
4 9
10// Uncomment to, upon BAR2 access failure, return a PRAMIN-based runlist pointer
11// **If enabled, PRAMIN may not be otherwise used while walking the runlist!**
5#define FALLBACK_TO_PRAMIN 12#define FALLBACK_TO_PRAMIN
6 13
7/* Get runlist head and info (incl. length) 14/* Get runlist head and info (incl. length)
8 @param rl_iter Location at which to store output 15 @param rl_id Which runlist to obtain?
9 @param rl_id Which runlist to obtain? 16 @param rl_iter Location at which to store output
17 @return 0 or -errno on error
10*/ 18*/
11int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter) { 19int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter) {
12 runlist_base_t rl_base; 20 uint64_t runlist_iova;
13 runlist_info_t rl_info; 21 enum INST_TARGET runlist_target;
14 u64 runlist_iova; 22 uint16_t runlist_len;
23#ifdef FALLBACK_TO_PRAMIN
24 int off;
25#endif // FALLBACK_TO_PRAMIN
26 // Zero-initialize the runlist iterator
15 *rl_iter = (struct runlist_iter){0}; 27 *rl_iter = (struct runlist_iter){0};
16 rl_base.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST_BASE(rl_id)); 28
17 // Check that reads are working 29 // Get runlist location and length using architecture-dependent logic
18 if (rl_base.raw == -1) 30 if (g->chip_id < NV_CHIP_ID_TURING) {
19 return -EIO; 31 eng_runlist_gf100_t rl;
20 // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be: 32 if ((rl.raw = nvdebug_readq(g, NV_PFIFO_ENG_RUNLIST_BASE_GF100(rl_id))) == -1)
21 // - A GPU address (type is sysmem_coherent) 33 return -EIO;
22 // - A physical address (dereferencing after ioremap crashes) 34 runlist_iova = ((uint64_t)rl.ptr) << 12;
23 // - A kernel virtual address (dereferencing segfaults) 35 runlist_target = rl.target;
24 // So maybe it's some sort of custom thing? This is an address that the GPU 36 printk(KERN_INFO "[nvdebug] Runlist %d: %d entries @ %llx in %s (config raw: %#018llx)\n",
25 // can use, so it would make most sense for it to be a physical address. 37 rl_id, rl.len, runlist_iova, target_to_text(rl.target), rl.raw);
26 // 38 runlist_len = rl.len;
27 // BUT, it can't possibly be a physical address, as it would refer to an 39 } else if (g->chip_id < NV_CHIP_ID_AMPERE) {
28 // address greater than the maximum one on our system (by a lot!). 40 runlist_base_tu102_t base;
29 // Maybe I'm reading the runlist base wrong? 41 runlist_submit_tu102_t submit;
30 // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual 42 if ((base.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_BASE_TU102(rl_id))) == -1)
31 // address! So, what's this I/O address space? All I know is that it's what 43 return -EIO;
32 // nvgpu_mem_get_addr() returns. That function returns the result of either: 44 if ((submit.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id))) == -1)
33 // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) 45 return -EIO;
34 // converts an IPA to a PA? 46 runlist_iova = ((uint64_t)base.ptr) << 12;
35 // - nvgpu_mem_iommu_translate 47 runlist_target = base.target;
36 // 48 runlist_len = submit.len;
37 // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which 49 }
38 // returns SYSMEM.
39 //
40 // To convert a physical address to a IOMMU address, we add a bit
41 //
42 // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working
43 // before because the GPU had simply gone to sleep and invalidated its
44 // register state, so nvgpu_readl() was simply returning garbage.
45 rl_info.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST(rl_id));
46 if (rl_info.raw == -1)
47 return -EIO;
48 runlist_iova = ((u64)rl_base.ptr) << 12;
49 printk(KERN_INFO "[nvdebug] Runlist %d @ %llx in %s (config raw: %x)\n",
50 rl_id, runlist_iova, target_to_text(rl_base.target), rl_base.raw);
51 printk(KERN_INFO "[nvdebug] Runlist length %d, ID %d\n", rl_info.len, rl_info.id);
52 // Return early on an empty runlist 50 // Return early on an empty runlist
53 if (!rl_info.len) 51 if (!runlist_len)
54 return 0; 52 return 0;
53
55 // If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping 54 // If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping
56 if (rl_base.target == TARGET_VID_MEM) { 55 if (runlist_target == TARGET_VID_MEM) {
57 printk(KERN_WARNING "[nvdebug] Runlist is located in video memory. Access to video memory is experimental."); 56 void __iomem *bar2_page_dir;
58 bar_config_block_t bar1_block, bar2_block; 57 bool pdb_is_ver2;
59 bar1_block.raw = nvdebug_readl(g, NV_PBUS_BAR1_BLOCK);
60 printk(KERN_INFO "[nvdebug] BAR1 inst block @ %llx in %s's %s address space.\n", ((u64)bar1_block.ptr) << 12, target_to_text(bar1_block.target), bar1_block.is_virtual ? "virtual" : "physical");
61 bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK);
62 printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar1_block.is_virtual ? "virtual" : "physical");
63 uint32_t bar_inst_pramin_offset = vram2PRAMIN(g, (uint64_t)bar2_block.ptr << 12);
64 if (!bar_inst_pramin_offset) {
65 printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n");
66 goto attempt_pramin_access;
67 }
68 /* TODO: Support BAR1?
69 bar_inst_pramin_offset = vram2PRAMIN(g, bar1_block.ptr << 12);
70 if (!bar_inst_pramin_offset) {
71 printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR1 in the current NV_PRAMIN window. VRAM inaccessible.\n");
72 return -EOPNOTSUPP;
73 }*/
74 // Instance blocks (size == 1kb) contain many things, but we only care about
75 // the section which describes the location of the page directory (page table)
76 uint32_t bar_pdb_config_pramin_offset = bar_inst_pramin_offset + NV_PRAMIN_PDB_CONFIG_OFF;
77 page_dir_config_t pd_config;
78 pd_config.raw = nvdebug_readq(g, bar_pdb_config_pramin_offset + NV_PRAMIN);
79 uint64_t bar_pdb_vram_addr = pd_config.page_dir_hi;
80 bar_pdb_vram_addr <<= 20;
81 bar_pdb_vram_addr |= pd_config.page_dir_lo;
82 bar_pdb_vram_addr <<= 12;
83 printk(KERN_INFO "[nvdebug] BAR2 PDB @ %llx in %s of version %s (config raw: %llx)\n", bar_pdb_vram_addr, target_to_text(pd_config.target), pd_config.is_ver2 ? "2" : "1", pd_config.raw);
84 // TODO: SYSMEM support for page table location
85 if (pd_config.target != TARGET_VID_MEM) {
86 printk(KERN_WARNING "[nvdebug] BAR2 PDB is in an unsupported location.\n");
87 goto attempt_pramin_access;
88 }
89 uint32_t bar_pdb_pramin_offset = vram2PRAMIN(g, bar_pdb_vram_addr);
90 if (!bar_pdb_pramin_offset) {
91 printk(KERN_WARNING "[nvdebug] Unable to find page directory BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n");
92 goto attempt_pramin_access;
93 }
94 uint64_t runlist_bar_vaddr; 58 uint64_t runlist_bar_vaddr;
95 if (pd_config.is_ver2) 59
96 runlist_bar_vaddr = search_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova); 60 if (get_bar2_pdb(g, &bar2_page_dir, &pdb_is_ver2) < 0)
61 return -EIO;
62
63 if (pdb_is_ver2)
64 runlist_bar_vaddr = search_page_directory(g, bar2_page_dir, phy2PRAMIN, runlist_iova);
97 else 65 else
98 runlist_bar_vaddr = search_v1_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova); 66 runlist_bar_vaddr = search_v1_page_directory(g, bar2_page_dir, phy2PRAMIN, runlist_iova);
99 if (!runlist_bar_vaddr) { 67 if (!runlist_bar_vaddr) {
100 printk(KERN_WARNING "[nvdebug] Unable to find runlist mapping in BAR2/3 page tables.\n"); 68 printk(KERN_WARNING "[nvdebug] Unable to find runlist mapping in BAR2/3 page tables.\n");
101 goto attempt_pramin_access; 69 goto attempt_pramin_access;
102 } 70 }
103 printk(KERN_INFO "[nvdebug] Runlist @ %llx in BAR2 virtual address space.\n", runlist_bar_vaddr); 71 printk(KERN_INFO "[nvdebug] Runlist @ %llx in BAR2 virtual address space.\n", runlist_bar_vaddr);
104 /* XXX: Old test code 72 if (!g->bar2) {
105 uint32_t bar2_pd_pramin_offset = vram_to_pramin_off(bar2_pd);
106 //walk_pd_subtree(bar2_pd_pramin_offset);
107 uint64_t runlist_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, runlist_iova);
108 page_dir_entry_t pde_0;
109 pde_0.raw = nvdebug_readl(g, NV_PRAMIN + bar2_pd_pramin_offset);
110 uint32_t pde_1 = nvdebug_readl(g, NV_PRAMIN + vram_to_pramin_off(((u64)pde_0.addr) << 12));
111 uint64_t pde_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, ((u64)pde_0.addr) << 12);
112 uint32_t pde_2 = readl(g->bar3 + pde_bar2_vaddr);
113 printk(KERN_INFO "[nvdebug] PDE0 via PRAMIN: %x, via BAR3: %x\n", pde_1, pde_2);
114 */
115 if (!g->bar3) {
116 printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped.\n"); 73 printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped.\n");
117 return -ENODEV; 74 return -ENODEV;
118 } 75 }
119 rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr; 76 rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr;
120 } else { 77 } else {
121 // Directly access the runlist if stored in SYS_MEM (physically addressed) 78 // Directly access the runlist if stored in SYS_MEM (physically addressed)
122 rl_iter->curr_entry = phys_to_virt(runlist_iova); 79 // XXX: SYS_MEM is an IOMMU address on some platforms, causing this to crash
80 rl_iter->curr_entry = (void*)phys_to_virt(runlist_iova);
123 } 81 }
124 rl_iter->rl_info = rl_info; 82 rl_iter->len = runlist_len;
125 return 0; 83 return 0;
84
126attempt_pramin_access: 85attempt_pramin_access:
127#ifdef FALLBACK_TO_PRAMIN 86#ifdef FALLBACK_TO_PRAMIN
128 printk(KERN_INFO "[nvdebug] Attempting to move PRAMIN window to runlist as BAR2/3-based access failed [DANGEROUS SIDE EFFECTS]!\n"); 87 printk(KERN_INFO "[nvdebug] Attempting to move PRAMIN window to runlist as BAR2/3-based access failed [DANGEROUS SIDE EFFECTS]!\n");
129 bar0_window_t win; 88 if ((off = addr_to_pramin_mut(g, runlist_iova, runlist_target)) == -1)
130 win.base = (runlist_iova >> 16); 89 return off;
131 win.target = TARGET_VID_MEM;
132 // Shift PRAMIN window. This will cause problems if it races with driver code
133 // that tries to do the same, or expects the window not to move.
134 nvdebug_writel(g, NV_PBUS_BAR0_WINDOW, win.raw);
135 uint32_t off = vram2PRAMIN(g, runlist_iova);
136 // Workaround bug for if `off` should be zero (vram2PRAMIN normally returns
137 // this on error)
138 if (!off && (runlist_iova & 0xffffull != runlist_iova)) {
139 printk(KERN_INFO "[nvdebug] Unable to shift PRAMIN to runlist. Aborting...\n");
140 return -EOPNOTSUPP;
141 }
142 rl_iter->curr_entry = g->regs + NV_PRAMIN + off; 90 rl_iter->curr_entry = g->regs + NV_PRAMIN + off;
143 rl_iter->rl_info = rl_info; 91 rl_iter->len = runlist_len;
144 return 0; 92 return 0;
145#else 93#else
146 return -EOPNOTSUPP; 94 return -EOPNOTSUPP;
147#endif // FALLBACK_TO_PRAMIN 95#endif // FALLBACK_TO_PRAMIN
148} 96}
149 97
98/* Trigger a preempt of the specified TSG
99 @param tsg_id ID of TSG to preempt.
100 @return 0 or -errno on error
101
102 Note: If no other TSGs exist in the associated runlist, this TSG may
103 continue executing, unless NV_PFIFO_SCHED_DISABLE is set, or all the
104 channels of the TSG to be preempted are disabled.
105*/
150int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) { 106int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) {
151 runlist_info_t rl_info;
152 pfifo_preempt_t pfifo_preempt; 107 pfifo_preempt_t pfifo_preempt;
153 runlist_disable_t rl_disable; 108 if (g->chip_id < NV_CHIP_ID_KEPLER)
154 if (!g) 109 return -EOPNOTSUPP;
155 return -EIO; 110
156 rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); 111 pfifo_preempt.raw = 0;
157 pfifo_preempt.id = tsg_id; 112 pfifo_preempt.id = tsg_id;
158 pfifo_preempt.is_pending = 0; 113 pfifo_preempt.is_pending = 0;
159 pfifo_preempt.type = PREEMPT_TYPE_TSG; 114 pfifo_preempt.type = PREEMPT_TYPE_TSG;
160 // There may be a bug (?) that requires us to disable scheduling before preempting 115
161 rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
162 rl_disable.raw |= BIT(rl_info.id); // Disable runlist rl_info.id
163 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
164 // Actually trigger the preemption 116 // Actually trigger the preemption
165 nvdebug_writel(g, NV_PFIFO_PREEMPT, pfifo_preempt.raw); 117 nvdebug_writel(g, NV_PFIFO_PREEMPT, pfifo_preempt.raw);
166 // Renable scheduling 118 return 0;
167 rl_disable.raw &= ~BIT(rl_info.id); // Enable runlist rl_info.id 119}
168 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); 120
121/* Trigger a preempt of the specified runlist
122 @param rl_id ID of runlist to preempt.
123 @return 0 or -errno on error
124*/
125int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id) {
126 runlist_preempt_t rl_preempt;
127 if (g->chip_id < NV_CHIP_ID_VOLTA)
128 return -EOPNOTSUPP;
169 129
170 printk(KERN_INFO "[nvdebug] TSG %d preempted (runlist %d)\n", tsg_id, rl_info.id); 130 // Overwrite, as the register contains nothing to preserve
131 rl_preempt.raw = BIT(rl_id);
132 nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw);
171 return 0; 133 return 0;
172} 134}
diff --git a/runlist_procfs.c b/runlist_procfs.c
index f7f937d..7dedee3 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -69,12 +69,12 @@ static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) {
69 if (err) 69 if (err)
70 return ERR_PTR(err); 70 return ERR_PTR(err);
71 // Don't try to print an empty runlist 71 // Don't try to print an empty runlist
72 if (rl_iter.rl_info.len <= 0) 72 if (rl_iter.len <= 0)
73 return NULL; 73 return NULL;
74 return &rl_iter; 74 return &rl_iter;
75 } 75 }
76 // If we're resuming an earlier print 76 // If we're resuming an earlier print
77 if (*pos < rl_iter.rl_info.len) { 77 if (*pos < rl_iter.len) {
78#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) 78#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
79 // There's a nasty bug prior to 4.19-rc1 that if the buffer overflows, the 79 // There's a nasty bug prior to 4.19-rc1 that if the buffer overflows, the
80 // last update to `pos` is not saved. Work around that here by reloading a 80 // last update to `pos` is not saved. Work around that here by reloading a
@@ -98,14 +98,16 @@ static void* runlist_file_seq_next(struct seq_file *s, void *raw_rl_iter,
98 (*pos)++; 98 (*pos)++;
99 rl_iter->curr_entry += NV_RL_ENTRY_SIZE(g); 99 rl_iter->curr_entry += NV_RL_ENTRY_SIZE(g);
100 // Verify we haven't reached the end of the runlist 100 // Verify we haven't reached the end of the runlist
101 // rl_info.len is the num of tsg entries + total num of channel entries 101 // len is the num of tsg entries + total num of channel entries
102 if (*pos < rl_iter->rl_info.len) { 102 if (*pos < rl_iter->len) {
103 ret = rl_iter; 103 ret = rl_iter;
104 } 104 }
105#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) 105#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
106 // Bug workaround. See comment in runlist_file_seq_start() 106 // Bug workaround. See comment in runlist_file_seq_start()
107 pos_fixup = ret ? *pos : 0; 107 pos_fixup = ret ? *pos : 0;
108#endif 108#endif
109 if (rl_iter->entries_left_in_tsg)
110 rl_iter->entries_left_in_tsg--;
109 return ret; 111 return ret;
110} 112}
111 113
@@ -113,17 +115,19 @@ static void runlist_file_seq_stop(struct seq_file *s, void *raw_rl_iter) {
113 // No cleanup needed 115 // No cleanup needed
114} 116}
115 117
118// _show() must be idempotent. This function will be rerun if the seq_printf
119// buffer was too small.
116static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { 120static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) {
117 struct runlist_iter *rl_iter = raw_rl_iter; 121 struct runlist_iter *rl_iter = raw_rl_iter;
118 void *entry = rl_iter->curr_entry; 122 void *entry = rl_iter->curr_entry;
119 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)]; 123 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)];
120 if (entry_type(g, entry) == ENTRY_TYPE_TSG) { 124 if (entry_type(g, entry) == ENTRY_TYPE_TSG) {
121 if (rl_iter->channels_left_in_tsg) { 125 if (rl_iter->entries_left_in_tsg) {
122 printk(KERN_WARNING "[nvdebug] Found TSG ID%d @ %px when %d channels were still expected under the previous TSG in the runlist!\n", tsgid(g, entry), entry, rl_iter->channels_left_in_tsg); 126 printk(KERN_WARNING "[nvdebug] Found TSG ID%d @ %px when %d channels were still expected under the previous TSG in the runlist!\n", tsgid(g, entry), entry, rl_iter->entries_left_in_tsg);
123 while (rl_iter->channels_left_in_tsg--) 127 while (rl_iter->entries_left_in_tsg--)
124 seq_printf(s, "[missing channel]\n"); 128 seq_printf(s, "[missing channel]\n");
125 } 129 }
126 rl_iter->channels_left_in_tsg = tsg_length(g, entry); 130 rl_iter->entries_left_in_tsg = tsg_length(g, entry) + 1;
127 seq_printf(s, "+---- TSG Entry %-3d---+\n", tsgid(g, entry)); 131 seq_printf(s, "+---- TSG Entry %-3d---+\n", tsgid(g, entry));
128 seq_printf(s, "| Scale: %-13d|\n", timeslice_scale(g, entry)); 132 seq_printf(s, "| Scale: %-13d|\n", timeslice_scale(g, entry));
129 seq_printf(s, "| Timeout: %-11d|\n", timeslice_timeout(g, entry)); 133 seq_printf(s, "| Timeout: %-11d|\n", timeslice_timeout(g, entry));
@@ -134,10 +138,8 @@ static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) {
134#ifndef DETAILED_CHANNEL_INFO 138#ifndef DETAILED_CHANNEL_INFO
135 u64 instance_ptr = 0; 139 u64 instance_ptr = 0;
136#endif 140#endif
137 if (rl_iter->channels_left_in_tsg) { 141 if (rl_iter->entries_left_in_tsg)
138 indt = " "; 142 indt = " ";
139 rl_iter->channels_left_in_tsg--;
140 }
141#ifdef DETAILED_CHANNEL_INFO 143#ifdef DETAILED_CHANNEL_INFO
142 runlist_detail_seq_show_chan(s, g, chid(g, entry), indt); 144 runlist_detail_seq_show_chan(s, g, chid(g, entry), indt);
143#else 145#else
@@ -193,8 +195,7 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
193 return -ERANGE; 195 return -ERANGE;
194 196
195 // Execute preemption 197 // Execute preemption
196 err = preempt_tsg(g, target_tsgid); 198 if ((err = preempt_tsg(g, target_tsgid)))
197 if (err)
198 return err; 199 return err;
199 200
200 return count; 201 return count;
@@ -210,8 +211,6 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
210 uint32_t target_channel; 211 uint32_t target_channel;
211 channel_ctrl_t chan; 212 channel_ctrl_t chan;
212 int err; 213 int err;
213 runlist_info_t rl_info;
214 runlist_disable_t rl_disable;
215 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; 214 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
216 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 215 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
217 err = kstrtou32_from_user(buffer, count, 0, &target_channel); 216 err = kstrtou32_from_user(buffer, count, 0, &target_channel);
@@ -221,19 +220,12 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
221 if (target_channel > MAX_CHID) 220 if (target_channel > MAX_CHID)
222 return -ERANGE; 221 return -ERANGE;
223 222
224 // Disable channel 223 // Read current configuration
225 chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); 224 if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
225 return -EIO;
226 // Request disablement
226 chan.enable_clear = true; 227 chan.enable_clear = true;
227 // disable sched
228 rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST);
229 rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
230 rl_disable.raw |= BIT(rl_info.id);
231 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
232 // disable chan
233 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); 228 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
234 // enable sched
235 rl_disable.raw &= ~BIT(rl_info.id);
236 nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
237 229
238 return count; 230 return count;
239} 231}
@@ -270,6 +262,7 @@ struct file_operations enable_channel_file_ops = {
270 .llseek = default_llseek, 262 .llseek = default_llseek,
271}; 263};
272 264
265// Note: Operates only on runlist 0 (Compute/Graphics)
273ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, 266ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
274 size_t count, loff_t *off) { 267 size_t count, loff_t *off) {
275 uint32_t target_tsgid; 268 uint32_t target_tsgid;
@@ -292,7 +285,7 @@ ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
292 return err; 285 return err;
293 286
294 // Iterate through all TSGs 287 // Iterate through all TSGs
295 while (pos < rl_iter.rl_info.len) { 288 while (pos < rl_iter.len) {
296 if (tsgid(g, rl_iter.curr_entry) == target_tsgid) { 289 if (tsgid(g, rl_iter.curr_entry) == target_tsgid) {
297 // Enable channels of target TSG 290 // Enable channels of target TSG
298 for_chan_in_tsg(g, chan, rl_iter.curr_entry) { 291 for_chan_in_tsg(g, chan, rl_iter.curr_entry) {
@@ -313,9 +306,8 @@ ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
313 pos += 1 + tsg_length(g, rl_iter.curr_entry); 306 pos += 1 + tsg_length(g, rl_iter.curr_entry);
314 rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); 307 rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry);
315 } 308 }
316 // Switch to next TSG with active channels (should be our TSG) 309 // Trigger a runlist-level preempt to switch to `target_tsgid`
317 err = preempt_tsg(g, target_tsgid); 310 if ((err = preempt_runlist(g, 0)))
318 if (err)
319 return err; 311 return err;
320 312
321 return count; 313 return count;