diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-16 15:53:23 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-16 15:57:36 -0400 |
commit | ac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (patch) | |
tree | 0f73a6a9f3fd15e2286fd7790804c2a06c7cba57 | |
parent | 232eafd04f272ed69d97a250c50a7bbed4d2894c (diff) |
Cleanup in nvdebug_entry.c
- Fix pointer corruption when `compat_ops()` is called more than
once on the same struct.
- Add support for detecting Jetson Orin on newer releases of L4T.
- Reorder some initialization steps such that the order matches for
both PCIe- and platform-bus devices.
- Remove a duplicate check in `nvdebug_exit()`.
Tested on ga10b (Jetson Orin) with L4T r36.3.
-rw-r--r-- | nvdebug_entry.c | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 1f9e1c9..5f99976 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -25,9 +25,9 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); | |||
25 | // runlist_procfs.c | 25 | // runlist_procfs.c |
26 | extern struct file_operations runlist_file_ops; | 26 | extern struct file_operations runlist_file_ops; |
27 | extern struct file_operations preempt_tsg_file_ops; | 27 | extern struct file_operations preempt_tsg_file_ops; |
28 | extern struct file_operations resubmit_runlist_file_ops; | ||
29 | extern struct file_operations disable_channel_file_ops; | 28 | extern struct file_operations disable_channel_file_ops; |
30 | extern struct file_operations enable_channel_file_ops; | 29 | extern struct file_operations enable_channel_file_ops; |
30 | extern struct file_operations resubmit_runlist_file_ops; | ||
31 | extern struct file_operations switch_to_tsg_file_ops; | 31 | extern struct file_operations switch_to_tsg_file_ops; |
32 | // device_info_procfs.c | 32 | // device_info_procfs.c |
33 | extern struct file_operations device_info_file_ops; | 33 | extern struct file_operations device_info_file_ops; |
@@ -46,15 +46,22 @@ extern struct bus_type platform_bus_type; | |||
46 | // As file_operations is larger than proc_ops, we can overwrite the memory | 46 | // As file_operations is larger than proc_ops, we can overwrite the memory |
47 | // backing the file_operations struct to follow the proc_ops layout, and then | 47 | // backing the file_operations struct to follow the proc_ops layout, and then |
48 | // cast on newer kernels. | 48 | // cast on newer kernels. |
49 | // We use the last byte of the file_operations struct to flag that the memory | ||
50 | // layout has been rearranged. | ||
49 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) | 51 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) |
50 | const struct proc_ops* compat_ops(const struct file_operations* ops) { | 52 | const struct proc_ops* compat_ops(const struct file_operations* ops) { |
51 | struct proc_ops new_ops = {}; | 53 | struct proc_ops new_ops = {}; |
54 | // Don't re-layout if it's already been done | ||
55 | if (*((uint8_t*)(ops + 1) - 1)) | ||
56 | return (struct proc_ops*)ops; | ||
52 | new_ops.proc_open = ops->open; | 57 | new_ops.proc_open = ops->open; |
53 | new_ops.proc_read = ops->read; | 58 | new_ops.proc_read = ops->read; |
54 | new_ops.proc_write = ops->write; | 59 | new_ops.proc_write = ops->write; |
55 | new_ops.proc_lseek = ops->llseek; | 60 | new_ops.proc_lseek = ops->llseek; |
56 | new_ops.proc_release = ops->release; | 61 | new_ops.proc_release = ops->release; |
57 | memcpy((void*)ops, &new_ops, sizeof(new_ops)); | 62 | memcpy((void*)ops, &new_ops, sizeof(new_ops)); |
63 | // Flag re-layout as complete in last byte of structure | ||
64 | *((uint8_t*)(ops + 1) - 1) = 1; | ||
58 | return (struct proc_ops*)ops; | 65 | return (struct proc_ops*)ops; |
59 | } | 66 | } |
60 | #else | 67 | #else |
@@ -89,6 +96,9 @@ int probe_and_cache_devices(void) { | |||
89 | }; | 96 | }; |
90 | int i = 0; | 97 | int i = 0; |
91 | // Search the platform bus for the first device that matches our name | 98 | // Search the platform bus for the first device that matches our name |
99 | // Search for embedded GPU on Jetson (generic name starting around L4T 36.3) | ||
100 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gpu"))) | ||
101 | dev = temp_dev; | ||
92 | // Search for GA10B (Jetson Orin) | 102 | // Search for GA10B (Jetson Orin) |
93 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.ga10b"))) | 103 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.ga10b"))) |
94 | dev = temp_dev; | 104 | dev = temp_dev; |
@@ -112,15 +122,15 @@ int probe_and_cache_devices(void) { | |||
112 | if (ids.raw == -1) | 122 | if (ids.raw == -1) |
113 | return -EADDRNOTAVAIL; | 123 | return -EADDRNOTAVAIL; |
114 | g_nvdebug_state[i].chip_id = ids.chip_id; | 124 | g_nvdebug_state[i].chip_id = ids.chip_id; |
115 | g_nvdebug_state[i].pcid = NULL; | ||
116 | g_nvdebug_state[i].bar3 = NULL; | 125 | g_nvdebug_state[i].bar3 = NULL; |
126 | g_nvdebug_state[i].pcid = NULL; | ||
117 | g_nvdebug_state[i].dev = dev; | 127 | g_nvdebug_state[i].dev = dev; |
118 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", | 128 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", |
119 | ids.chip_id, ARCH2NAME(ids.architecture)); | 129 | ids.chip_id, ARCH2NAME(ids.architecture)); |
120 | i++; | 130 | i++; |
121 | } | 131 | } |
122 | // Search the PCI bus and iterate through all matches | 132 | // Search the PCI bus and iterate through all matches |
123 | // FIXME: State rollback | 133 | // FIXME: Undo the pci_iomap() if this fails |
124 | while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) { | 134 | while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) { |
125 | mc_boot_0_t ids; | 135 | mc_boot_0_t ids; |
126 | g_nvdebug_state[i].g = NULL; | 136 | g_nvdebug_state[i].g = NULL; |
@@ -130,20 +140,20 @@ int probe_and_cache_devices(void) { | |||
130 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); | 140 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); |
131 | return -EADDRNOTAVAIL; | 141 | return -EADDRNOTAVAIL; |
132 | } | 142 | } |
143 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
144 | if (ids.raw == -1) { | ||
145 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
146 | return -EADDRNOTAVAIL; | ||
147 | } | ||
148 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
133 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) | 149 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) |
134 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); | 150 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); |
135 | // Try mapping only the lower half of BAR3 on fail | 151 | // XXX: Try mapping only the lower half of BAR3 on fail |
136 | // (vesafb may map the top half for display) | 152 | // (vesafb may map the top half for display) |
137 | if (!g_nvdebug_state[i].bar3) | 153 | if (!g_nvdebug_state[i].bar3) |
138 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); | 154 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); |
139 | g_nvdebug_state[i].pcid = pcid; | 155 | g_nvdebug_state[i].pcid = pcid; |
140 | g_nvdebug_state[i].dev = &pcid->dev; | 156 | g_nvdebug_state[i].dev = &pcid->dev; |
141 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
142 | if (ids.raw == -1) { | ||
143 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
144 | return -EADDRNOTAVAIL; | ||
145 | } | ||
146 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
147 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", | 157 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", |
148 | ids.chip_id, ARCH2NAME(ids.architecture)); | 158 | ids.chip_id, ARCH2NAME(ids.architecture)); |
149 | #if INTERRUPT_DEBUG | 159 | #if INTERRUPT_DEBUG |
@@ -397,12 +407,12 @@ static void __exit nvdebug_exit(void) { | |||
397 | char device_id[7]; | 407 | char device_id[7]; |
398 | snprintf(device_id, 7, "gpu%d", g_nvdebug_devices); | 408 | snprintf(device_id, 7, "gpu%d", g_nvdebug_devices); |
399 | remove_proc_subtree(device_id, NULL); | 409 | remove_proc_subtree(device_id, NULL); |
400 | // Free BAR mappings for PCIe devices | ||
401 | g = &g_nvdebug_state[g_nvdebug_devices]; | 410 | g = &g_nvdebug_state[g_nvdebug_devices]; |
411 | // Free BAR mappings for PCIe devices | ||
402 | if (g && g->pcid) { | 412 | if (g && g->pcid) { |
403 | if (g && g->regs) | 413 | if (g->regs) |
404 | pci_iounmap(g->pcid, g->regs); | 414 | pci_iounmap(g->pcid, g->regs); |
405 | if (g && g->bar2) | 415 | if (g->bar2) |
406 | pci_iounmap(g->pcid, g->bar2); | 416 | pci_iounmap(g->pcid, g->bar2); |
407 | #if INTERRUPT_DEBUG | 417 | #if INTERRUPT_DEBUG |
408 | free_irq(g->pcid->irq, g->pcid); | 418 | free_irq(g->pcid->irq, g->pcid); |