From 47506870790989b5e2d9a6128711d96c487f0d7b Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Mon, 8 Apr 2024 15:35:54 -0400 Subject: Heavily refactor runlist code for correctness and Turing support - Support differently-formatted runlist registers on Turing - Support different runlist register offsets on Turing - Fix incorrect indenting when printing the runlist - Fix `preempt_tsg` and `switch_to_tsg` API implementations to correctly interface with the hardware (previously, they would try to disable scheduling for the last-updated runlist pointer, which was nonsense, and just an artifact of my early misunderstandings of how the NV_PFIFO_RUNLIST* registers worked). - Remove misused NV_PFIFO_RUNLIST and NV_PFIFO_RUNLIST_BASE registers - Refactor `runlist.c` to use the APIs from `bus.c` --- nvdebug.h | 116 +++++++++++++++++++++--------- runlist.c | 212 +++++++++++++++++++++++-------------------------------- runlist_procfs.c | 52 ++++++-------- 3 files changed, 192 insertions(+), 188 deletions(-) diff --git a/nvdebug.h b/nvdebug.h index 2fc8c63..f65b403 100644 --- a/nvdebug.h +++ b/nvdebug.h @@ -131,8 +131,8 @@ struct gm107_runlist_chan { GPU instance addresses with Volta. */ -// Support: Volta, Ampere*, Turing* -// *These treat the top 8 bits of TSGID as GFID (unused) +// Support: Volta, Turing*, Ampere* +// *These treat bits 4:11 (8 bits) as GFID (unused) struct gv100_runlist_tsg { // 0:63 enum ENTRY_TYPE entry_type:1; @@ -166,7 +166,7 @@ enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; /* Preempt a TSG or Channel by ID ID/CHID : Id of TSG or channel to preempt - IS_PENDING : Is a context switch pending? + IS_PENDING : Is a context switch pending? (read-only) TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG Support: Kepler, Maxwell, Pascal, Volta, Turing @@ -201,7 +201,7 @@ typedef union { rl_preempt.raw |= BIT(nr); nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); - Support: Volta + Support: Volta, Turing */ #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 typedef union { @@ -255,39 +255,83 @@ typedef union { * cause a system to hang/stop responding." */ -// Note: This is different with Turing -// Support: Fermi, Kepler, Maxwell, Pascal, Volta -#define NV_PFIFO_RUNLIST_BASE 0x00002270 -#define NV_PFIFO_ENG_RUNLIST_BASE(i) (0x00002280+(i)*8) +/* Runlist Metadata (up through Volta) + "Software specifies the GPU contexts that hardware should "run" by writing a + list of entries (known as a "runlist") to a 4k-aligned area of memory (beginning + at NV_PFIFO_RUNLIST_BASE), and by notifying Host that a new list is available + (by writing to NV_PFIFO_RUNLIST). + + Submission of a new runlist causes Host to expire the timeslice of all work + scheduled by the previous runlist, allowing it to schedule the channels present + in the new runlist once they are fetched. SW can check the status of the runlist + by polling NV_PFIFO_ENG_RUNLIST_PENDING. (see dev_fifo.ref NV_PFIFO_RUNLIST for + a full description of the runlist submit mechanism). + + Runlists can be stored in system memory or video memory (as specified by + NV_PFIFO_RUNLIST_BASE_TARGET). If a runlist is stored in video memory, software + will have to execute flush or read the last entry written before submitting the + runlist to Host to guarantee coherency." (volta/dev_ram.ref.txt) + + We only document the *_PFIFO_ENG_RUNLIST_*(i) read-only registers here (where + i is a runlist index). Runlists are configured via the seperate, writable + *_PFIFO_RUNLIST_* register; see open-gpu-doc for more on that. + + LEN : Number of entries in runlist + IS_PENDING : Is runlist committed? + PTR : Pointer to start of 4k-aligned runlist (upper 28 of 40 bits) + TARGET : Aperture of runlist (video or system memory) + + Support: Fermi*, Kepler, Maxwell, Pascal, Volta + *Fermi may expose this information 8 bytes earlier, starting at 0x227C? +*/ +#define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only typedef union { struct { + // NV_PFIFO_ENG_RUNLIST_BASE_* fields uint32_t ptr:28; enum INST_TARGET target:2; - uint32_t padding:2; + uint32_t padding1:2; + // NV_PFIFO_ENG_RUNLIST_* fields + uint16_t len:16; + uint32_t padding2:4; + bool is_pending:1; + uint32_t padding3:11; } __attribute__((packed)); - uint32_t raw; -} runlist_base_t; + uint64_t raw; +} eng_runlist_gf100_t; -// Support: Kepler, Maxwell, Pascal, Volta -// Works on Fermi, but id is one bit longer and is b11111 -#define NV_PFIFO_RUNLIST 0x00002274 -#define NV_PFIFO_ENG_RUNLIST(i) (0x00002284+(i)*8) +/* + Starting with Turing, the seperate registers for reading and writing runlist + configuration were dropped in favor of read/write indexed registers. As part + of this, the layout was modified to allow for larger runlist pointers (upper + 52 of 64 bits). + + Support: Turing, Ampere, Lovelace?, Hopper? +*/ +// Support: Turing +#define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write +#define NV_PFIFO_RUNLIST_SUBMIT_TU102(i) (0x00002B08+(i)*16) // Read/write typedef union { - // RUNLIST fields struct { - uint32_t len:16; - uint32_t padding:4; - uint32_t id:4; // Runlist ID (each engine may have a seperate runlist) - uint32_t padding2:8; + enum INST_TARGET target:2; + uint32_t padding:10; + uint64_t ptr:28; + uint32_t padding2:24; } __attribute__((packed)); - // ENG_RUNLIST fields that differ + uint64_t raw; +} runlist_base_tu102_t; + +typedef union { struct { - uint32_t padding3:20; - bool is_pending:1; // Is runlist not yet committed? - uint32_t padding4:11; + uint16_t len:16; + uint16_t offset:16; + uint32_t preempted_tsgid:14; + bool valid_preempted_tsgid:1; + bool is_pending:1; + uint32_t preempted_offset:16; } __attribute__((packed)); - uint32_t raw; -} runlist_info_t; + uint64_t raw; +} runlist_submit_tu102_t; enum CHANNEL_STATUS { CHANNEL_STATUS_IDLE = 0, @@ -307,8 +351,13 @@ enum CHANNEL_STATUS { CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, }; +/* Programmable Channel Control System RAM (PCCSR) + + 512-entry array of channel control and status data structures. + + Support: Fermi, Maxwell, Pascal, Volta, Turing, [more?] +*/ #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) -// There are a total of 512 possible channels #define MAX_CHID 512 typedef union { struct { @@ -1023,12 +1072,12 @@ VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsg_length); struct runlist_iter { // Pointer to either a TSG or channel entry (they're the same size) void *curr_entry; - // This should be set to tsg_length when a TSG is reached, and - // decremented as each subsequent channel is printed. This allows us to - // track which channel are and are not part of the TSG. - int channels_left_in_tsg; - // Total runlist length, etc - runlist_info_t rl_info; + // This should be set to tsg_length + 1 when a TSG is reached, and + // decremented each time _next() is called. This allows us to + // track which channels are and are not part of the TSG. + int entries_left_in_tsg; + // Number of entries in runlist + int len; }; #define NVDEBUG_MAX_DEVICES 8 @@ -1037,6 +1086,7 @@ extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; // Defined in runlist.c int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); +int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); // Defined in mmu.c uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr); diff --git a/runlist.c b/runlist.c index ed35c7e..c725e77 100644 --- a/runlist.c +++ b/runlist.c @@ -1,172 +1,134 @@ -#include // Kernel types +/* Copyright 2024 Joshua Bakita + * Helpers for dealing with the runlist and other Host (PFIFO) registers + */ +#include // For printk() +#include // For error defines +#include // For phys_to_virt() #include "nvdebug.h" +// Uncomment to, upon BAR2 access failure, return a PRAMIN-based runlist pointer +// **If enabled, PRAMIN may not be otherwise used while walking the runlist!** #define FALLBACK_TO_PRAMIN /* Get runlist head and info (incl. length) - @param rl_iter Location at which to store output - @param rl_id Which runlist to obtain? + @param rl_id Which runlist to obtain? + @param rl_iter Location at which to store output + @return 0 or -errno on error */ int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter) { - runlist_base_t rl_base; - runlist_info_t rl_info; - u64 runlist_iova; + uint64_t runlist_iova; + enum INST_TARGET runlist_target; + uint16_t runlist_len; +#ifdef FALLBACK_TO_PRAMIN + int off; +#endif // FALLBACK_TO_PRAMIN + // Zero-initialize the runlist iterator *rl_iter = (struct runlist_iter){0}; - rl_base.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST_BASE(rl_id)); - // Check that reads are working - if (rl_base.raw == -1) - return -EIO; - // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be: - // - A GPU address (type is sysmem_coherent) - // - A physical address (dereferencing after ioremap crashes) - // - A kernel virtual address (dereferencing segfaults) - // So maybe it's some sort of custom thing? This is an address that the GPU - // can use, so it would make most sense for it to be a physical address. - // - // BUT, it can't possibly be a physical address, as it would refer to an - // address greater than the maximum one on our system (by a lot!). - // Maybe I'm reading the runlist base wrong? - // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual - // address! So, what's this I/O address space? All I know is that it's what - // nvgpu_mem_get_addr() returns. That function returns the result of either: - // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) - // converts an IPA to a PA? - // - nvgpu_mem_iommu_translate - // - // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which - // returns SYSMEM. - // - // To convert a physical address to a IOMMU address, we add a bit - // - // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working - // before because the GPU had simply gone to sleep and invalidated its - // register state, so nvgpu_readl() was simply returning garbage. - rl_info.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST(rl_id)); - if (rl_info.raw == -1) - return -EIO; - runlist_iova = ((u64)rl_base.ptr) << 12; - printk(KERN_INFO "[nvdebug] Runlist %d @ %llx in %s (config raw: %x)\n", - rl_id, runlist_iova, target_to_text(rl_base.target), rl_base.raw); - printk(KERN_INFO "[nvdebug] Runlist length %d, ID %d\n", rl_info.len, rl_info.id); + + // Get runlist location and length using architecture-dependent logic + if (g->chip_id < NV_CHIP_ID_TURING) { + eng_runlist_gf100_t rl; + if ((rl.raw = nvdebug_readq(g, NV_PFIFO_ENG_RUNLIST_BASE_GF100(rl_id))) == -1) + return -EIO; + runlist_iova = ((uint64_t)rl.ptr) << 12; + runlist_target = rl.target; + printk(KERN_INFO "[nvdebug] Runlist %d: %d entries @ %llx in %s (config raw: %#018llx)\n", + rl_id, rl.len, runlist_iova, target_to_text(rl.target), rl.raw); + runlist_len = rl.len; + } else if (g->chip_id < NV_CHIP_ID_AMPERE) { + runlist_base_tu102_t base; + runlist_submit_tu102_t submit; + if ((base.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_BASE_TU102(rl_id))) == -1) + return -EIO; + if ((submit.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id))) == -1) + return -EIO; + runlist_iova = ((uint64_t)base.ptr) << 12; + runlist_target = base.target; + runlist_len = submit.len; + } // Return early on an empty runlist - if (!rl_info.len) + if (!runlist_len) return 0; + // If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping - if (rl_base.target == TARGET_VID_MEM) { - printk(KERN_WARNING "[nvdebug] Runlist is located in video memory. Access to video memory is experimental."); - bar_config_block_t bar1_block, bar2_block; - bar1_block.raw = nvdebug_readl(g, NV_PBUS_BAR1_BLOCK); - printk(KERN_INFO "[nvdebug] BAR1 inst block @ %llx in %s's %s address space.\n", ((u64)bar1_block.ptr) << 12, target_to_text(bar1_block.target), bar1_block.is_virtual ? "virtual" : "physical"); - bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK); - printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar1_block.is_virtual ? "virtual" : "physical"); - uint32_t bar_inst_pramin_offset = vram2PRAMIN(g, (uint64_t)bar2_block.ptr << 12); - if (!bar_inst_pramin_offset) { - printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n"); - goto attempt_pramin_access; - } - /* TODO: Support BAR1? - bar_inst_pramin_offset = vram2PRAMIN(g, bar1_block.ptr << 12); - if (!bar_inst_pramin_offset) { - printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR1 in the current NV_PRAMIN window. VRAM inaccessible.\n"); - return -EOPNOTSUPP; - }*/ - // Instance blocks (size == 1kb) contain many things, but we only care about - // the section which describes the location of the page directory (page table) - uint32_t bar_pdb_config_pramin_offset = bar_inst_pramin_offset + NV_PRAMIN_PDB_CONFIG_OFF; - page_dir_config_t pd_config; - pd_config.raw = nvdebug_readq(g, bar_pdb_config_pramin_offset + NV_PRAMIN); - uint64_t bar_pdb_vram_addr = pd_config.page_dir_hi; - bar_pdb_vram_addr <<= 20; - bar_pdb_vram_addr |= pd_config.page_dir_lo; - bar_pdb_vram_addr <<= 12; - printk(KERN_INFO "[nvdebug] BAR2 PDB @ %llx in %s of version %s (config raw: %llx)\n", bar_pdb_vram_addr, target_to_text(pd_config.target), pd_config.is_ver2 ? "2" : "1", pd_config.raw); - // TODO: SYSMEM support for page table location - if (pd_config.target != TARGET_VID_MEM) { - printk(KERN_WARNING "[nvdebug] BAR2 PDB is in an unsupported location.\n"); - goto attempt_pramin_access; - } - uint32_t bar_pdb_pramin_offset = vram2PRAMIN(g, bar_pdb_vram_addr); - if (!bar_pdb_pramin_offset) { - printk(KERN_WARNING "[nvdebug] Unable to find page directory BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n"); - goto attempt_pramin_access; - } + if (runlist_target == TARGET_VID_MEM) { + void __iomem *bar2_page_dir; + bool pdb_is_ver2; uint64_t runlist_bar_vaddr; - if (pd_config.is_ver2) - runlist_bar_vaddr = search_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova); + + if (get_bar2_pdb(g, &bar2_page_dir, &pdb_is_ver2) < 0) + return -EIO; + + if (pdb_is_ver2) + runlist_bar_vaddr = search_page_directory(g, bar2_page_dir, phy2PRAMIN, runlist_iova); else - runlist_bar_vaddr = search_v1_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova); + runlist_bar_vaddr = search_v1_page_directory(g, bar2_page_dir, phy2PRAMIN, runlist_iova); if (!runlist_bar_vaddr) { printk(KERN_WARNING "[nvdebug] Unable to find runlist mapping in BAR2/3 page tables.\n"); goto attempt_pramin_access; } printk(KERN_INFO "[nvdebug] Runlist @ %llx in BAR2 virtual address space.\n", runlist_bar_vaddr); - /* XXX: Old test code - uint32_t bar2_pd_pramin_offset = vram_to_pramin_off(bar2_pd); - //walk_pd_subtree(bar2_pd_pramin_offset); - uint64_t runlist_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, runlist_iova); - page_dir_entry_t pde_0; - pde_0.raw = nvdebug_readl(g, NV_PRAMIN + bar2_pd_pramin_offset); - uint32_t pde_1 = nvdebug_readl(g, NV_PRAMIN + vram_to_pramin_off(((u64)pde_0.addr) << 12)); - uint64_t pde_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, ((u64)pde_0.addr) << 12); - uint32_t pde_2 = readl(g->bar3 + pde_bar2_vaddr); - printk(KERN_INFO "[nvdebug] PDE0 via PRAMIN: %x, via BAR3: %x\n", pde_1, pde_2); - */ - if (!g->bar3) { + if (!g->bar2) { printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped.\n"); return -ENODEV; } rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr; } else { // Directly access the runlist if stored in SYS_MEM (physically addressed) - rl_iter->curr_entry = phys_to_virt(runlist_iova); + // XXX: SYS_MEM is an IOMMU address on some platforms, causing this to crash + rl_iter->curr_entry = (void*)phys_to_virt(runlist_iova); } - rl_iter->rl_info = rl_info; + rl_iter->len = runlist_len; return 0; + attempt_pramin_access: #ifdef FALLBACK_TO_PRAMIN printk(KERN_INFO "[nvdebug] Attempting to move PRAMIN window to runlist as BAR2/3-based access failed [DANGEROUS SIDE EFFECTS]!\n"); - bar0_window_t win; - win.base = (runlist_iova >> 16); - win.target = TARGET_VID_MEM; - // Shift PRAMIN window. This will cause problems if it races with driver code - // that tries to do the same, or expects the window not to move. - nvdebug_writel(g, NV_PBUS_BAR0_WINDOW, win.raw); - uint32_t off = vram2PRAMIN(g, runlist_iova); - // Workaround bug for if `off` should be zero (vram2PRAMIN normally returns - // this on error) - if (!off && (runlist_iova & 0xffffull != runlist_iova)) { - printk(KERN_INFO "[nvdebug] Unable to shift PRAMIN to runlist. Aborting...\n"); - return -EOPNOTSUPP; - } + if ((off = addr_to_pramin_mut(g, runlist_iova, runlist_target)) == -1) + return off; rl_iter->curr_entry = g->regs + NV_PRAMIN + off; - rl_iter->rl_info = rl_info; + rl_iter->len = runlist_len; return 0; #else return -EOPNOTSUPP; #endif // FALLBACK_TO_PRAMIN } +/* Trigger a preempt of the specified TSG + @param tsg_id ID of TSG to preempt. + @return 0 or -errno on error + + Note: If no other TSGs exist in the associated runlist, this TSG may + continue executing, unless NV_PFIFO_SCHED_DISABLE is set, or all the + channels of the TSG to be preempted are disabled. +*/ int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) { - runlist_info_t rl_info; pfifo_preempt_t pfifo_preempt; - runlist_disable_t rl_disable; - if (!g) - return -EIO; - rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); + if (g->chip_id < NV_CHIP_ID_KEPLER) + return -EOPNOTSUPP; + + pfifo_preempt.raw = 0; pfifo_preempt.id = tsg_id; pfifo_preempt.is_pending = 0; pfifo_preempt.type = PREEMPT_TYPE_TSG; - // There may be a bug (?) that requires us to disable scheduling before preempting - rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE); - rl_disable.raw |= BIT(rl_info.id); // Disable runlist rl_info.id - nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); + // Actually trigger the preemption nvdebug_writel(g, NV_PFIFO_PREEMPT, pfifo_preempt.raw); - // Renable scheduling - rl_disable.raw &= ~BIT(rl_info.id); // Enable runlist rl_info.id - nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); + return 0; +} + +/* Trigger a preempt of the specified runlist + @param rl_id ID of runlist to preempt. + @return 0 or -errno on error +*/ +int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id) { + runlist_preempt_t rl_preempt; + if (g->chip_id < NV_CHIP_ID_VOLTA) + return -EOPNOTSUPP; - printk(KERN_INFO "[nvdebug] TSG %d preempted (runlist %d)\n", tsg_id, rl_info.id); + // Overwrite, as the register contains nothing to preserve + rl_preempt.raw = BIT(rl_id); + nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); return 0; } diff --git a/runlist_procfs.c b/runlist_procfs.c index f7f937d..7dedee3 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c @@ -69,12 +69,12 @@ static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) { if (err) return ERR_PTR(err); // Don't try to print an empty runlist - if (rl_iter.rl_info.len <= 0) + if (rl_iter.len <= 0) return NULL; return &rl_iter; } // If we're resuming an earlier print - if (*pos < rl_iter.rl_info.len) { + if (*pos < rl_iter.len) { #if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) // There's a nasty bug prior to 4.19-rc1 that if the buffer overflows, the // last update to `pos` is not saved. Work around that here by reloading a @@ -98,14 +98,16 @@ static void* runlist_file_seq_next(struct seq_file *s, void *raw_rl_iter, (*pos)++; rl_iter->curr_entry += NV_RL_ENTRY_SIZE(g); // Verify we haven't reached the end of the runlist - // rl_info.len is the num of tsg entries + total num of channel entries - if (*pos < rl_iter->rl_info.len) { + // len is the num of tsg entries + total num of channel entries + if (*pos < rl_iter->len) { ret = rl_iter; } #if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) // Bug workaround. See comment in runlist_file_seq_start() pos_fixup = ret ? *pos : 0; #endif + if (rl_iter->entries_left_in_tsg) + rl_iter->entries_left_in_tsg--; return ret; } @@ -113,17 +115,19 @@ static void runlist_file_seq_stop(struct seq_file *s, void *raw_rl_iter) { // No cleanup needed } +// _show() must be idempotent. This function will be rerun if the seq_printf +// buffer was too small. static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { struct runlist_iter *rl_iter = raw_rl_iter; void *entry = rl_iter->curr_entry; struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)]; if (entry_type(g, entry) == ENTRY_TYPE_TSG) { - if (rl_iter->channels_left_in_tsg) { - printk(KERN_WARNING "[nvdebug] Found TSG ID%d @ %px when %d channels were still expected under the previous TSG in the runlist!\n", tsgid(g, entry), entry, rl_iter->channels_left_in_tsg); - while (rl_iter->channels_left_in_tsg--) + if (rl_iter->entries_left_in_tsg) { + printk(KERN_WARNING "[nvdebug] Found TSG ID%d @ %px when %d channels were still expected under the previous TSG in the runlist!\n", tsgid(g, entry), entry, rl_iter->entries_left_in_tsg); + while (rl_iter->entries_left_in_tsg--) seq_printf(s, "[missing channel]\n"); } - rl_iter->channels_left_in_tsg = tsg_length(g, entry); + rl_iter->entries_left_in_tsg = tsg_length(g, entry) + 1; seq_printf(s, "+---- TSG Entry %-3d---+\n", tsgid(g, entry)); seq_printf(s, "| Scale: %-13d|\n", timeslice_scale(g, entry)); seq_printf(s, "| Timeout: %-11d|\n", timeslice_timeout(g, entry)); @@ -134,10 +138,8 @@ static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { #ifndef DETAILED_CHANNEL_INFO u64 instance_ptr = 0; #endif - if (rl_iter->channels_left_in_tsg) { + if (rl_iter->entries_left_in_tsg) indt = " "; - rl_iter->channels_left_in_tsg--; - } #ifdef DETAILED_CHANNEL_INFO runlist_detail_seq_show_chan(s, g, chid(g, entry), indt); #else @@ -193,8 +195,7 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, return -ERANGE; // Execute preemption - err = preempt_tsg(g, target_tsgid); - if (err) + if ((err = preempt_tsg(g, target_tsgid))) return err; return count; @@ -210,8 +211,6 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, uint32_t target_channel; channel_ctrl_t chan; int err; - runlist_info_t rl_info; - runlist_disable_t rl_disable; struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec err = kstrtou32_from_user(buffer, count, 0, &target_channel); @@ -221,19 +220,12 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, if (target_channel > MAX_CHID) return -ERANGE; - // Disable channel - chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); + // Read current configuration + if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) + return -EIO; + // Request disablement chan.enable_clear = true; - // disable sched - rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); - rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE); - rl_disable.raw |= BIT(rl_info.id); - nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); - // disable chan nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); - // enable sched - rl_disable.raw &= ~BIT(rl_info.id); - nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); return count; } @@ -270,6 +262,7 @@ struct file_operations enable_channel_file_ops = { .llseek = default_llseek, }; +// Note: Operates only on runlist 0 (Compute/Graphics) ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, size_t count, loff_t *off) { uint32_t target_tsgid; @@ -292,7 +285,7 @@ ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, return err; // Iterate through all TSGs - while (pos < rl_iter.rl_info.len) { + while (pos < rl_iter.len) { if (tsgid(g, rl_iter.curr_entry) == target_tsgid) { // Enable channels of target TSG for_chan_in_tsg(g, chan, rl_iter.curr_entry) { @@ -313,9 +306,8 @@ ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, pos += 1 + tsg_length(g, rl_iter.curr_entry); rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); } - // Switch to next TSG with active channels (should be our TSG) - err = preempt_tsg(g, target_tsgid); - if (err) + // Trigger a runlist-level preempt to switch to `target_tsgid` + if ((err = preempt_runlist(g, 0))) return err; return count; -- cgit v1.2.2