#include // For struct device, bus_find_device*() //#include // For struct iommu_domain #include // Kernel types #include #include "nvdebug.h" // Bus types are global symbols in the kernel extern struct bus_type platform_bus_type; struct gk20a* get_live_gk20a(void) { struct device *dev = NULL; struct device *temp_dev; struct gk20a *g; struct nvgpu_os_linux *l; // Get the last device that matches our name while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) { dev = temp_dev; printk(KERN_INFO "[nvdebug] Found a matching device %s\n", dev_name(dev)); } if (!dev) return NULL; g = get_gk20a(dev); // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be: // - A GPU address (type is sysmem_coherent) // - A physical address (dereferencing after ioremap crashes) // - A kernel virtual address (dereferencing segfaults) // So maybe it's some sort of custom thing? This is an address that the GPU // can use, so it would make most sense for it to be a physical address. // // BUT, it can't possibly be a physical address, as it would refer to an // address greater than the maximum one on our system (by a lot!). // Maybe I'm reading the runlist base wrong? // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual // address! So, what's this I/O address space? All I know is that it's what // nvgpu_mem_get_addr() returns. That function returns the result of either: // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) // converts an IPA to a PA? // - nvgpu_mem_iommu_translate // // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which // returns SYSMEM. // // To convert a physical address to a IOMMU address, we add a bit // // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working // before because the GPU had simply gone to sleep and invalidated its // register state, so nvgpu_readl() was simply returning garbage. l = container_of(g, struct nvgpu_os_linux, g); if (!l->regs) return NULL; return g; } /* Get runlist head and info (incl. length) @param rl_iter Location at which to store output */ int get_runlist_iter(struct runlist_iter *rl_iter) { struct entry_tsg head; runlist_base_t rl_base; runlist_info_t rl_info; u64 runlist_iova; struct gk20a *g = get_live_gk20a(); if (!g) return -EIO; rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE); rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); runlist_iova = ((u64)rl_base.ptr) << 12; printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n", rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova); // Segfaults //u32 attempted_read = ioread32(runlist_iova); //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read); // Errors out //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg)); //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr); /* Overcomplicated? struct iommu_domain *domain = iommu_get_domain_for_dev(dev); if (!domain) { printk(KERN_INFO "[nvdebug] No IOMMU domain!\n"); return -EIO; } u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova); printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr); */ printk(KERN_INFO "[nvdebug] Runlist phys_to_virt: %px\n", (void*)phys_to_virt(runlist_iova)); printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt: %x\n", *(u32*)phys_to_virt(runlist_iova)); head = *(struct entry_tsg*)phys_to_virt(runlist_iova); rl_iter->curr_tsg = (struct entry_tsg*)phys_to_virt(runlist_iova); rl_iter->rl_info = rl_info; return 0; //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL)); //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL)); //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg)); /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */ }