From ac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Mon, 16 Sep 2024 15:53:23 -0400 Subject: Cleanup in nvdebug_entry.c - Fix pointer corruption when `compat_ops()` is called more than once on the same struct. - Add support for detecting Jetson Orin on newer releases of L4T. - Reorder some initialization steps such that the order matches for both PCIe- and platform-bus devices. - Remove a duplicate check in `nvdebug_exit()`. Tested on ga10b (Jetson Orin) with L4T r36.3. --- nvdebug_entry.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 1f9e1c9..5f99976 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c @@ -25,9 +25,9 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); // runlist_procfs.c extern struct file_operations runlist_file_ops; extern struct file_operations preempt_tsg_file_ops; -extern struct file_operations resubmit_runlist_file_ops; extern struct file_operations disable_channel_file_ops; extern struct file_operations enable_channel_file_ops; +extern struct file_operations resubmit_runlist_file_ops; extern struct file_operations switch_to_tsg_file_ops; // device_info_procfs.c extern struct file_operations device_info_file_ops; @@ -46,15 +46,22 @@ extern struct bus_type platform_bus_type; // As file_operations is larger than proc_ops, we can overwrite the memory // backing the file_operations struct to follow the proc_ops layout, and then // cast on newer kernels. +// We use the last byte of the file_operations struct to flag that the memory +// layout has been rearranged. #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) const struct proc_ops* compat_ops(const struct file_operations* ops) { struct proc_ops new_ops = {}; + // Don't re-layout if it's already been done + if (*((uint8_t*)(ops + 1) - 1)) + return (struct proc_ops*)ops; new_ops.proc_open = ops->open; new_ops.proc_read = ops->read; new_ops.proc_write = ops->write; new_ops.proc_lseek = ops->llseek; new_ops.proc_release = ops->release; memcpy((void*)ops, &new_ops, sizeof(new_ops)); + // Flag re-layout as complete in last byte of structure + *((uint8_t*)(ops + 1) - 1) = 1; return (struct proc_ops*)ops; } #else @@ -89,6 +96,9 @@ int probe_and_cache_devices(void) { }; int i = 0; // Search the platform bus for the first device that matches our name + // Search for embedded GPU on Jetson (generic name starting around L4T 36.3) + while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gpu"))) + dev = temp_dev; // Search for GA10B (Jetson Orin) while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.ga10b"))) dev = temp_dev; @@ -112,15 +122,15 @@ int probe_and_cache_devices(void) { if (ids.raw == -1) return -EADDRNOTAVAIL; g_nvdebug_state[i].chip_id = ids.chip_id; - g_nvdebug_state[i].pcid = NULL; g_nvdebug_state[i].bar3 = NULL; + g_nvdebug_state[i].pcid = NULL; g_nvdebug_state[i].dev = dev; printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", ids.chip_id, ARCH2NAME(ids.architecture)); i++; } // Search the PCI bus and iterate through all matches - // FIXME: State rollback + // FIXME: Undo the pci_iomap() if this fails while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) { mc_boot_0_t ids; g_nvdebug_state[i].g = NULL; @@ -130,20 +140,20 @@ int probe_and_cache_devices(void) { pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); return -EADDRNOTAVAIL; } + ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); + if (ids.raw == -1) { + pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); + return -EADDRNOTAVAIL; + } + g_nvdebug_state[i].chip_id = ids.chip_id; // Map BAR3 (CPU-accessible mappings of GPU DRAM) g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); - // Try mapping only the lower half of BAR3 on fail + // XXX: Try mapping only the lower half of BAR3 on fail // (vesafb may map the top half for display) if (!g_nvdebug_state[i].bar3) g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); g_nvdebug_state[i].pcid = pcid; g_nvdebug_state[i].dev = &pcid->dev; - ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); - if (ids.raw == -1) { - pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); - return -EADDRNOTAVAIL; - } - g_nvdebug_state[i].chip_id = ids.chip_id; printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", ids.chip_id, ARCH2NAME(ids.architecture)); #if INTERRUPT_DEBUG @@ -397,12 +407,12 @@ static void __exit nvdebug_exit(void) { char device_id[7]; snprintf(device_id, 7, "gpu%d", g_nvdebug_devices); remove_proc_subtree(device_id, NULL); - // Free BAR mappings for PCIe devices g = &g_nvdebug_state[g_nvdebug_devices]; + // Free BAR mappings for PCIe devices if (g && g->pcid) { - if (g && g->regs) + if (g->regs) pci_iounmap(g->pcid, g->regs); - if (g && g->bar2) + if (g->bar2) pci_iounmap(g->pcid, g->bar2); #if INTERRUPT_DEBUG free_irq(g->pcid->irq, g->pcid); -- cgit v1.2.2