/* Copyright 2021 Joshua Bakita * SPDX-License-Identifier: MIT */ /* TODO * - Add /proc /sys or debugfs interface * - Add API to trigger a preemption */ #include #include #include #include #include // For struct iommu_domain #include /* Currently used symbols: * - struct gk20a; * - struct nvgpu_os_linux; * - void nvgpu_writel(struct gk20a *g, u32 reg_addr, u32 value); */ #include #include #include #include "nvdebug.h" MODULE_LICENSE("GPL"); // LIAR MODULE_AUTHOR("Joshua Bakita"); MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now // Bus types are global symbols in the kernel extern struct bus_type platform_bus_type; static inline struct gk20a *get_gk20a(struct device *dev) { // XXX: Only works because gk20a* is the first member of gk20a_platform return *((struct gk20a**)dev_get_drvdata(dev)); } // Functionally identical to nvgpu_readl() // (except we don't try to resolve situations where regs is NULL) static inline u32 nvdebug_readl(struct gk20a* g, u32 r) { struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); if (unlikely(!g_os->regs)) { printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); return -1; } return readl(g_os->regs + r); } // Functionally identical to nvgpu_writel() static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); if (unlikely(!g_os->regs)) { printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); return; } writel_relaxed(v, g_os->regs + r); wmb(); } /* #define RUNLIST_PROCFS_NAME "runlist" static const struct seq_operations runlist_file_seq_ops = { .start = .next = .stop = .show = }; static const struct file_operations runlist_file_ops = { .read = */ /*static void read_bytes(struct gk20a *g, void* target, u32 start, u32 num_bytes) { u32 *output = target; u32 i; // Read u32s from the GPU for (i = 0; i < num_bytes; i += 4) { output[i/4] = _nvgpu_readl(g, start + i); printk(KERN_INFO "[nvdebug] U32 %d: %0x\n", i, output[i/4]); } } static void read_bytes(void* target, void* start, u32 num_bytes) { u32 *output = target; u32 i; // Read u32s from the GPU for (i = 0; i < num_bytes; i += 4) { output[i/4] = readl(start + i); printk(KERN_INFO "[nvdebug] U32 %d: %0x\n", i, output[i/4]); } }*/ /* +---- TSG Entry %d ----+ | Scale: %d | | Timeout: %d | +----------------------+ */ #define PRE KERN_INFO "[nvdebug] " static void nvdebug_print_tsg(struct entry_tsg* tsg) { if (tsg->entry_type != ENTRY_TYPE_TSG) { printk(KERN_WARNING "[nvdebug] Attempted to print non-TSG in nvdebug_print_tsg()!\n"); return; } printk(PRE "+---- TSG Entry %-2d----+", tsg->tsgid); printk(PRE "| Scale: %-13d|", tsg->timeslice_scale); printk(PRE "| Timeout: %-11d|", tsg->timeslice_timeout); printk(PRE "+---------------------+"); } static void nvdebug_print_chan(struct runlist_chan* chan) { char* loc_txt; u64 inst_ptr; if (chan->entry_type != ENTRY_TYPE_CHAN) { printk(KERN_WARNING "[nvdebug] Attempted to print non-channel in nvdebug_print_channel()!\n"); return; } switch (chan->inst_target) { case TARGET_VID_MEM: loc_txt = "VID_MEM"; break; case TARGET_SYS_MEM_COHERENT: loc_txt = "SYS_MEM_COHERENT"; break; case TARGET_SYS_MEM_NONCOHERENT: loc_txt = "SYS_MEM_NONCOHERENT"; break; default: printk(KERN_WARNING "[nvdebug] Invalid aperture in runlist channel!\n"); return; } // Reconstruct pointer to channel instance block inst_ptr = chan->inst_ptr_hi; inst_ptr <<= 32; inst_ptr |= chan->inst_ptr_lo << 12; printk(PRE " +- Channel Entry %-4d-+", chan->chid); printk(PRE " | Runqueue Selector: %d|", chan->runqueue_selector); printk(PRE " | Instance PTR: |"); printk(PRE " | %#018llx |", inst_ptr); printk(PRE " | %-20s|", loc_txt); printk(PRE " +---------------------+"); } #define for_chan_in_tsg(chan, tsg) \ for (chan = (struct runlist_chan*)(tsg + 1); \ (void*)chan < (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length; \ chan++) #define next_tsg(tsg) \ (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length static void nvdebug_print_runlist(struct entry_tsg* head, runlist_info_t rl_info) { int rl_idx = 0; struct runlist_chan* chan; printk(PRE "tsg->tsg_length: %d\n", head->tsg_length); printk(PRE "rl_info.len: %d\n", rl_info.len); while (rl_idx < rl_info.len) { nvdebug_print_tsg(head); for_chan_in_tsg(chan, head) { nvdebug_print_chan(chan); } rl_idx += 1 + head->tsg_length; head = next_tsg(head); } } static int __init nvdebug_init(void) { struct device *dev = NULL; struct device *temp_dev; struct gk20a *g; struct entry_tsg head; runlist_base_t rl_base; runlist_info_t rl_info; u64 runlist_iova; // Get the last device that matches our name while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) { printk(KERN_INFO "Found a matching device\n"); dev = temp_dev; } if (!dev) return -EIO; g = get_gk20a(dev); // This address seems to not be: // - A GPU address (type is sysmem_coherent) // - A physical address (dereferencing after ioremap crashes) // - A kernel virtual address (dereferencing segfaults) // So maybe it's some sort of custom thing? This is an address that the GPU // can use, so it would make most sense for it to be a physical address. // // BUT, it can't possibly be a physical address, as it would refer to an // address greater than the maximum one on our system (by a lot!). // Maybe I'm reading the runlist base wrong? // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual // address! So, what's this I/O address space? All I know is that it's what // nvgpu_mem_get_addr() returns. That function returns the result of either: // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) // converts an IPA to a PA? // - nvgpu_mem_iommu_translate // // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which // returns SYSMEM. // // To convert a physical address to a IOMMU address, we add a bit // // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working // before because the GPU had simply gone to sleep and invalidated its // register state, so nvgpu_readl() was simply returning garbage. printk(KERN_INFO "[nvdebug] Pulling runlist base address from %x\n", NV_PFIFO_RUNLIST_BASE); printk(KERN_INFO "[nvdebug] Using struct gk20a* of %px\n", g); printk(KERN_INFO "[nvdebug] g->name: %s, g->power_on: %d, g->sw_ready: %d, g->is_virtual %d\n", g->name, g->power_on, g->sw_ready, g->is_virtual); struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g); printk(KERN_INFO "[nvdebug] l->regs %px, l->regs_saved %px\n", l->regs, l->regs_saved); if (!l->regs) return -EIO; rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE); rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); runlist_iova = ((u64)rl_base.ptr) << 12; printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n", rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova); // Segfaults //u32 attempted_read = ioread32(runlist_iova); //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read); // Errors out //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg)); //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr); /* Overcomplicated? struct iommu_domain *domain = iommu_get_domain_for_dev(dev); if (!domain) { printk(KERN_INFO "[nvdebug] No IOMMU domain!\n"); return -EIO; } u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova); printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr); */ printk(KERN_INFO "[nvdebug] Runlist phys_to_virt: %px\n", (void*)phys_to_virt(runlist_iova)); printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt: %x\n", *(u32*)phys_to_virt(runlist_iova)); head = *(struct entry_tsg*)phys_to_virt(runlist_iova); nvdebug_print_runlist((struct entry_tsg*)phys_to_virt(runlist_iova), rl_info); //nvdebug_print_tsg(&head); //nvdebug_print_chan((struct runlist_chan*)(phys_to_virt(runlist_iova) + sizeof(struct entry_tsg))); //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL)); //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL)); //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg)); /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */return 0; } static void __exit nvdebug_exit(void) { printk(KERN_INFO "[nvdebug] Exiting...\n"); } module_init(nvdebug_init); module_exit(nvdebug_exit);