diff options
| author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-16 15:53:23 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-16 15:57:36 -0400 |
| commit | ac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (patch) | |
| tree | 0f73a6a9f3fd15e2286fd7790804c2a06c7cba57 | |
| parent | 232eafd04f272ed69d97a250c50a7bbed4d2894c (diff) | |
Cleanup in nvdebug_entry.c
- Fix pointer corruption when `compat_ops()` is called more than
once on the same struct.
- Add support for detecting Jetson Orin on newer releases of L4T.
- Reorder some initialization steps such that the order matches for
both PCIe- and platform-bus devices.
- Remove a duplicate check in `nvdebug_exit()`.
Tested on ga10b (Jetson Orin) with L4T r36.3.
| -rw-r--r-- | nvdebug_entry.c | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 1f9e1c9..5f99976 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
| @@ -25,9 +25,9 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); | |||
| 25 | // runlist_procfs.c | 25 | // runlist_procfs.c |
| 26 | extern struct file_operations runlist_file_ops; | 26 | extern struct file_operations runlist_file_ops; |
| 27 | extern struct file_operations preempt_tsg_file_ops; | 27 | extern struct file_operations preempt_tsg_file_ops; |
| 28 | extern struct file_operations resubmit_runlist_file_ops; | ||
| 29 | extern struct file_operations disable_channel_file_ops; | 28 | extern struct file_operations disable_channel_file_ops; |
| 30 | extern struct file_operations enable_channel_file_ops; | 29 | extern struct file_operations enable_channel_file_ops; |
| 30 | extern struct file_operations resubmit_runlist_file_ops; | ||
| 31 | extern struct file_operations switch_to_tsg_file_ops; | 31 | extern struct file_operations switch_to_tsg_file_ops; |
| 32 | // device_info_procfs.c | 32 | // device_info_procfs.c |
| 33 | extern struct file_operations device_info_file_ops; | 33 | extern struct file_operations device_info_file_ops; |
| @@ -46,15 +46,22 @@ extern struct bus_type platform_bus_type; | |||
| 46 | // As file_operations is larger than proc_ops, we can overwrite the memory | 46 | // As file_operations is larger than proc_ops, we can overwrite the memory |
| 47 | // backing the file_operations struct to follow the proc_ops layout, and then | 47 | // backing the file_operations struct to follow the proc_ops layout, and then |
| 48 | // cast on newer kernels. | 48 | // cast on newer kernels. |
| 49 | // We use the last byte of the file_operations struct to flag that the memory | ||
| 50 | // layout has been rearranged. | ||
| 49 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) | 51 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) |
| 50 | const struct proc_ops* compat_ops(const struct file_operations* ops) { | 52 | const struct proc_ops* compat_ops(const struct file_operations* ops) { |
| 51 | struct proc_ops new_ops = {}; | 53 | struct proc_ops new_ops = {}; |
| 54 | // Don't re-layout if it's already been done | ||
| 55 | if (*((uint8_t*)(ops + 1) - 1)) | ||
| 56 | return (struct proc_ops*)ops; | ||
| 52 | new_ops.proc_open = ops->open; | 57 | new_ops.proc_open = ops->open; |
| 53 | new_ops.proc_read = ops->read; | 58 | new_ops.proc_read = ops->read; |
| 54 | new_ops.proc_write = ops->write; | 59 | new_ops.proc_write = ops->write; |
| 55 | new_ops.proc_lseek = ops->llseek; | 60 | new_ops.proc_lseek = ops->llseek; |
| 56 | new_ops.proc_release = ops->release; | 61 | new_ops.proc_release = ops->release; |
| 57 | memcpy((void*)ops, &new_ops, sizeof(new_ops)); | 62 | memcpy((void*)ops, &new_ops, sizeof(new_ops)); |
| 63 | // Flag re-layout as complete in last byte of structure | ||
| 64 | *((uint8_t*)(ops + 1) - 1) = 1; | ||
| 58 | return (struct proc_ops*)ops; | 65 | return (struct proc_ops*)ops; |
| 59 | } | 66 | } |
| 60 | #else | 67 | #else |
| @@ -89,6 +96,9 @@ int probe_and_cache_devices(void) { | |||
| 89 | }; | 96 | }; |
| 90 | int i = 0; | 97 | int i = 0; |
| 91 | // Search the platform bus for the first device that matches our name | 98 | // Search the platform bus for the first device that matches our name |
| 99 | // Search for embedded GPU on Jetson (generic name starting around L4T 36.3) | ||
| 100 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gpu"))) | ||
| 101 | dev = temp_dev; | ||
| 92 | // Search for GA10B (Jetson Orin) | 102 | // Search for GA10B (Jetson Orin) |
| 93 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.ga10b"))) | 103 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.ga10b"))) |
| 94 | dev = temp_dev; | 104 | dev = temp_dev; |
| @@ -112,15 +122,15 @@ int probe_and_cache_devices(void) { | |||
| 112 | if (ids.raw == -1) | 122 | if (ids.raw == -1) |
| 113 | return -EADDRNOTAVAIL; | 123 | return -EADDRNOTAVAIL; |
| 114 | g_nvdebug_state[i].chip_id = ids.chip_id; | 124 | g_nvdebug_state[i].chip_id = ids.chip_id; |
| 115 | g_nvdebug_state[i].pcid = NULL; | ||
| 116 | g_nvdebug_state[i].bar3 = NULL; | 125 | g_nvdebug_state[i].bar3 = NULL; |
| 126 | g_nvdebug_state[i].pcid = NULL; | ||
| 117 | g_nvdebug_state[i].dev = dev; | 127 | g_nvdebug_state[i].dev = dev; |
| 118 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", | 128 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", |
| 119 | ids.chip_id, ARCH2NAME(ids.architecture)); | 129 | ids.chip_id, ARCH2NAME(ids.architecture)); |
| 120 | i++; | 130 | i++; |
| 121 | } | 131 | } |
| 122 | // Search the PCI bus and iterate through all matches | 132 | // Search the PCI bus and iterate through all matches |
| 123 | // FIXME: State rollback | 133 | // FIXME: Undo the pci_iomap() if this fails |
| 124 | while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) { | 134 | while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) { |
| 125 | mc_boot_0_t ids; | 135 | mc_boot_0_t ids; |
| 126 | g_nvdebug_state[i].g = NULL; | 136 | g_nvdebug_state[i].g = NULL; |
| @@ -130,20 +140,20 @@ int probe_and_cache_devices(void) { | |||
| 130 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); | 140 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); |
| 131 | return -EADDRNOTAVAIL; | 141 | return -EADDRNOTAVAIL; |
| 132 | } | 142 | } |
| 143 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
| 144 | if (ids.raw == -1) { | ||
| 145 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
| 146 | return -EADDRNOTAVAIL; | ||
| 147 | } | ||
| 148 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
| 133 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) | 149 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) |
| 134 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); | 150 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); |
| 135 | // Try mapping only the lower half of BAR3 on fail | 151 | // XXX: Try mapping only the lower half of BAR3 on fail |
| 136 | // (vesafb may map the top half for display) | 152 | // (vesafb may map the top half for display) |
| 137 | if (!g_nvdebug_state[i].bar3) | 153 | if (!g_nvdebug_state[i].bar3) |
| 138 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); | 154 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); |
| 139 | g_nvdebug_state[i].pcid = pcid; | 155 | g_nvdebug_state[i].pcid = pcid; |
| 140 | g_nvdebug_state[i].dev = &pcid->dev; | 156 | g_nvdebug_state[i].dev = &pcid->dev; |
| 141 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
| 142 | if (ids.raw == -1) { | ||
| 143 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
| 144 | return -EADDRNOTAVAIL; | ||
| 145 | } | ||
| 146 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
| 147 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", | 157 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", |
| 148 | ids.chip_id, ARCH2NAME(ids.architecture)); | 158 | ids.chip_id, ARCH2NAME(ids.architecture)); |
| 149 | #if INTERRUPT_DEBUG | 159 | #if INTERRUPT_DEBUG |
| @@ -397,12 +407,12 @@ static void __exit nvdebug_exit(void) { | |||
| 397 | char device_id[7]; | 407 | char device_id[7]; |
| 398 | snprintf(device_id, 7, "gpu%d", g_nvdebug_devices); | 408 | snprintf(device_id, 7, "gpu%d", g_nvdebug_devices); |
| 399 | remove_proc_subtree(device_id, NULL); | 409 | remove_proc_subtree(device_id, NULL); |
| 400 | // Free BAR mappings for PCIe devices | ||
| 401 | g = &g_nvdebug_state[g_nvdebug_devices]; | 410 | g = &g_nvdebug_state[g_nvdebug_devices]; |
| 411 | // Free BAR mappings for PCIe devices | ||
| 402 | if (g && g->pcid) { | 412 | if (g && g->pcid) { |
| 403 | if (g && g->regs) | 413 | if (g->regs) |
| 404 | pci_iounmap(g->pcid, g->regs); | 414 | pci_iounmap(g->pcid, g->regs); |
| 405 | if (g && g->bar2) | 415 | if (g->bar2) |
| 406 | pci_iounmap(g->pcid, g->bar2); | 416 | pci_iounmap(g->pcid, g->bar2); |
| 407 | #if INTERRUPT_DEBUG | 417 | #if INTERRUPT_DEBUG |
| 408 | free_irq(g->pcid->irq, g->pcid); | 418 | free_irq(g->pcid->irq, g->pcid); |
