aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug_entry.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-09-16 15:53:23 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2024-09-16 15:57:36 -0400
commitac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (patch)
tree0f73a6a9f3fd15e2286fd7790804c2a06c7cba57 /nvdebug_entry.c
parent232eafd04f272ed69d97a250c50a7bbed4d2894c (diff)
Cleanup in nvdebug_entry.c
- Fix pointer corruption when `compat_ops()` is called more than once on the same struct. - Add support for detecting Jetson Orin on newer releases of L4T. - Reorder some initialization steps such that the order matches for both PCIe- and platform-bus devices. - Remove a duplicate check in `nvdebug_exit()`. Tested on ga10b (Jetson Orin) with L4T r36.3.
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r--nvdebug_entry.c36
1 files changed, 23 insertions, 13 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 1f9e1c9..5f99976 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -25,9 +25,9 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs");
25// runlist_procfs.c 25// runlist_procfs.c
26extern struct file_operations runlist_file_ops; 26extern struct file_operations runlist_file_ops;
27extern struct file_operations preempt_tsg_file_ops; 27extern struct file_operations preempt_tsg_file_ops;
28extern struct file_operations resubmit_runlist_file_ops;
29extern struct file_operations disable_channel_file_ops; 28extern struct file_operations disable_channel_file_ops;
30extern struct file_operations enable_channel_file_ops; 29extern struct file_operations enable_channel_file_ops;
30extern struct file_operations resubmit_runlist_file_ops;
31extern struct file_operations switch_to_tsg_file_ops; 31extern struct file_operations switch_to_tsg_file_ops;
32// device_info_procfs.c 32// device_info_procfs.c
33extern struct file_operations device_info_file_ops; 33extern struct file_operations device_info_file_ops;
@@ -46,15 +46,22 @@ extern struct bus_type platform_bus_type;
46// As file_operations is larger than proc_ops, we can overwrite the memory 46// As file_operations is larger than proc_ops, we can overwrite the memory
47// backing the file_operations struct to follow the proc_ops layout, and then 47// backing the file_operations struct to follow the proc_ops layout, and then
48// cast on newer kernels. 48// cast on newer kernels.
49// We use the last byte of the file_operations struct to flag that the memory
50// layout has been rearranged.
49#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) 51#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
50const struct proc_ops* compat_ops(const struct file_operations* ops) { 52const struct proc_ops* compat_ops(const struct file_operations* ops) {
51 struct proc_ops new_ops = {}; 53 struct proc_ops new_ops = {};
54 // Don't re-layout if it's already been done
55 if (*((uint8_t*)(ops + 1) - 1))
56 return (struct proc_ops*)ops;
52 new_ops.proc_open = ops->open; 57 new_ops.proc_open = ops->open;
53 new_ops.proc_read = ops->read; 58 new_ops.proc_read = ops->read;
54 new_ops.proc_write = ops->write; 59 new_ops.proc_write = ops->write;
55 new_ops.proc_lseek = ops->llseek; 60 new_ops.proc_lseek = ops->llseek;
56 new_ops.proc_release = ops->release; 61 new_ops.proc_release = ops->release;
57 memcpy((void*)ops, &new_ops, sizeof(new_ops)); 62 memcpy((void*)ops, &new_ops, sizeof(new_ops));
63 // Flag re-layout as complete in last byte of structure
64 *((uint8_t*)(ops + 1) - 1) = 1;
58 return (struct proc_ops*)ops; 65 return (struct proc_ops*)ops;
59} 66}
60#else 67#else
@@ -89,6 +96,9 @@ int probe_and_cache_devices(void) {
89 }; 96 };
90 int i = 0; 97 int i = 0;
91 // Search the platform bus for the first device that matches our name 98 // Search the platform bus for the first device that matches our name
99 // Search for embedded GPU on Jetson (generic name starting around L4T 36.3)
100 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gpu")))
101 dev = temp_dev;
92 // Search for GA10B (Jetson Orin) 102 // Search for GA10B (Jetson Orin)
93 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.ga10b"))) 103 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.ga10b")))
94 dev = temp_dev; 104 dev = temp_dev;
@@ -112,15 +122,15 @@ int probe_and_cache_devices(void) {
112 if (ids.raw == -1) 122 if (ids.raw == -1)
113 return -EADDRNOTAVAIL; 123 return -EADDRNOTAVAIL;
114 g_nvdebug_state[i].chip_id = ids.chip_id; 124 g_nvdebug_state[i].chip_id = ids.chip_id;
115 g_nvdebug_state[i].pcid = NULL;
116 g_nvdebug_state[i].bar3 = NULL; 125 g_nvdebug_state[i].bar3 = NULL;
126 g_nvdebug_state[i].pcid = NULL;
117 g_nvdebug_state[i].dev = dev; 127 g_nvdebug_state[i].dev = dev;
118 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", 128 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.",
119 ids.chip_id, ARCH2NAME(ids.architecture)); 129 ids.chip_id, ARCH2NAME(ids.architecture));
120 i++; 130 i++;
121 } 131 }
122 // Search the PCI bus and iterate through all matches 132 // Search the PCI bus and iterate through all matches
123 // FIXME: State rollback 133 // FIXME: Undo the pci_iomap() if this fails
124 while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) { 134 while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) {
125 mc_boot_0_t ids; 135 mc_boot_0_t ids;
126 g_nvdebug_state[i].g = NULL; 136 g_nvdebug_state[i].g = NULL;
@@ -130,20 +140,20 @@ int probe_and_cache_devices(void) {
130 pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); 140 pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n");
131 return -EADDRNOTAVAIL; 141 return -EADDRNOTAVAIL;
132 } 142 }
143 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
144 if (ids.raw == -1) {
145 pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
146 return -EADDRNOTAVAIL;
147 }
148 g_nvdebug_state[i].chip_id = ids.chip_id;
133 // Map BAR3 (CPU-accessible mappings of GPU DRAM) 149 // Map BAR3 (CPU-accessible mappings of GPU DRAM)
134 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); 150 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0);
135 // Try mapping only the lower half of BAR3 on fail 151 // XXX: Try mapping only the lower half of BAR3 on fail
136 // (vesafb may map the top half for display) 152 // (vesafb may map the top half for display)
137 if (!g_nvdebug_state[i].bar3) 153 if (!g_nvdebug_state[i].bar3)
138 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); 154 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2);
139 g_nvdebug_state[i].pcid = pcid; 155 g_nvdebug_state[i].pcid = pcid;
140 g_nvdebug_state[i].dev = &pcid->dev; 156 g_nvdebug_state[i].dev = &pcid->dev;
141 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
142 if (ids.raw == -1) {
143 pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
144 return -EADDRNOTAVAIL;
145 }
146 g_nvdebug_state[i].chip_id = ids.chip_id;
147 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", 157 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.",
148 ids.chip_id, ARCH2NAME(ids.architecture)); 158 ids.chip_id, ARCH2NAME(ids.architecture));
149#if INTERRUPT_DEBUG 159#if INTERRUPT_DEBUG
@@ -397,12 +407,12 @@ static void __exit nvdebug_exit(void) {
397 char device_id[7]; 407 char device_id[7];
398 snprintf(device_id, 7, "gpu%d", g_nvdebug_devices); 408 snprintf(device_id, 7, "gpu%d", g_nvdebug_devices);
399 remove_proc_subtree(device_id, NULL); 409 remove_proc_subtree(device_id, NULL);
400 // Free BAR mappings for PCIe devices
401 g = &g_nvdebug_state[g_nvdebug_devices]; 410 g = &g_nvdebug_state[g_nvdebug_devices];
411 // Free BAR mappings for PCIe devices
402 if (g && g->pcid) { 412 if (g && g->pcid) {
403 if (g && g->regs) 413 if (g->regs)
404 pci_iounmap(g->pcid, g->regs); 414 pci_iounmap(g->pcid, g->regs);
405 if (g && g->bar2) 415 if (g->bar2)
406 pci_iounmap(g->pcid, g->bar2); 416 pci_iounmap(g->pcid, g->bar2);
407#if INTERRUPT_DEBUG 417#if INTERRUPT_DEBUG
408 free_irq(g->pcid->irq, g->pcid); 418 free_irq(g->pcid->irq, g->pcid);