diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-25 15:58:37 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-25 15:58:37 -0400 |
commit | 8340d234d78a7d0f46c11a584de538148b78b7cb (patch) | |
tree | 4837d0debd711fbe7a5cd292c2606b8c1e5d50aa /nvdebug_entry.c | |
parent | e2fe4cb56e6252b9cf0b43c6180efbb20a168ce0 (diff) |
Remove dependency on Jetson (nvgpu) driver internals
For integrated (Jetson) GPUs:
- Directly retrieve and map GPU register region 0
- Directly check GPU power-on state before a register read/write
- Resume the GPU as needed for a register read/write
Most nvgpu APIs can now be called on TX2+ integrated GPUs without
first having to start some task on the GPU to make it non-suspended.
Tested on Jetson TX1, TX2, Xavier, and Orin.
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r-- | nvdebug_entry.c | 44 |
1 files changed, 33 insertions, 11 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index d5df7db..3a10e13 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/pci.h> // For PCI device scanning | 9 | #include <linux/pci.h> // For PCI device scanning |
10 | #include <linux/platform_device.h> // For platform_device struct | ||
10 | #include <linux/proc_fs.h> // So we can set up entries in /proc | 11 | #include <linux/proc_fs.h> // So we can set up entries in /proc |
11 | 12 | ||
12 | #include "nvdebug_linux.h" | 13 | #include "nvdebug_linux.h" |
@@ -114,17 +115,33 @@ int probe_and_cache_devices(void) { | |||
114 | // TODO: Support other platform bus devices (gk20a - TK1) | 115 | // TODO: Support other platform bus devices (gk20a - TK1) |
115 | if (dev) { | 116 | if (dev) { |
116 | mc_boot_0_t ids; | 117 | mc_boot_0_t ids; |
118 | struct platform_device *platd = container_of(dev, struct platform_device, dev); | ||
119 | struct resource *regs = platform_get_resource(platd, IORESOURCE_MEM, 0); | ||
117 | g_nvdebug_state[i].g = get_gk20a(dev); | 120 | g_nvdebug_state[i].g = get_gk20a(dev); |
118 | g_nvdebug_state[i].regs = gk20a_regs(g_nvdebug_state[i].g); | 121 | if (!regs) |
119 | if (!g_nvdebug_state[i].regs) | ||
120 | return -EADDRNOTAVAIL; | 122 | return -EADDRNOTAVAIL; |
121 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | 123 | g_nvdebug_state[i].regs = ioremap(regs->start, resource_size(regs)); |
122 | if (ids.raw == -1) | 124 | if (!g_nvdebug_state[i].regs) { |
125 | printk(KERN_ERR "[nvdebug] Unable to map BAR0 on the integrated GPU\n"); | ||
123 | return -EADDRNOTAVAIL; | 126 | return -EADDRNOTAVAIL; |
124 | g_nvdebug_state[i].chip_id = ids.chip_id; | 127 | } |
128 | // The Jetson TX1, TX2, Xavier, and Orin do not have a BAR2 (but do have | ||
129 | // BAR1). On the TX2+, all their platform resources are: | ||
130 | // [nvdebug] Region 0: Memory at 17000000 [size=16777216] | ||
131 | // [nvdebug] Region 1: Memory at 18000000 [size=16777216] | ||
132 | // [nvdebug] Region 2: Memory at 3b41000 [size=4096] | ||
133 | // The TX1 has the same regions, but at different base addresses. | ||
125 | g_nvdebug_state[i].bar3 = NULL; | 134 | g_nvdebug_state[i].bar3 = NULL; |
126 | g_nvdebug_state[i].pcid = NULL; | 135 | g_nvdebug_state[i].pcid = NULL; |
136 | g_nvdebug_state[i].platd = platd; | ||
127 | g_nvdebug_state[i].dev = dev; | 137 | g_nvdebug_state[i].dev = dev; |
138 | // Don't check Chip ID until everything else is initalized | ||
139 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
140 | if (ids.raw == -1) { | ||
141 | printk(KERN_ERR "[nvdebug] Unable to read config from Master Controller on the integrated GPU\n"); | ||
142 | return -EADDRNOTAVAIL; | ||
143 | } | ||
144 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
128 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", | 145 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", |
129 | ids.chip_id, ARCH2NAME(ids.architecture)); | 146 | ids.chip_id, ARCH2NAME(ids.architecture)); |
130 | i++; | 147 | i++; |
@@ -140,12 +157,6 @@ int probe_and_cache_devices(void) { | |||
140 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); | 157 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); |
141 | return -EADDRNOTAVAIL; | 158 | return -EADDRNOTAVAIL; |
142 | } | 159 | } |
143 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
144 | if (ids.raw == -1) { | ||
145 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
146 | return -EADDRNOTAVAIL; | ||
147 | } | ||
148 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
149 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) | 160 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) |
150 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); | 161 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); |
151 | // XXX: Try mapping only the lower half of BAR3 on fail | 162 | // XXX: Try mapping only the lower half of BAR3 on fail |
@@ -153,7 +164,15 @@ int probe_and_cache_devices(void) { | |||
153 | if (!g_nvdebug_state[i].bar3) | 164 | if (!g_nvdebug_state[i].bar3) |
154 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); | 165 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); |
155 | g_nvdebug_state[i].pcid = pcid; | 166 | g_nvdebug_state[i].pcid = pcid; |
167 | g_nvdebug_state[i].platd = NULL; | ||
156 | g_nvdebug_state[i].dev = &pcid->dev; | 168 | g_nvdebug_state[i].dev = &pcid->dev; |
169 | // Don't check Chip ID until everything else is initalized | ||
170 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
171 | if (ids.raw == -1) { | ||
172 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
173 | return -EADDRNOTAVAIL; | ||
174 | } | ||
175 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
157 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", | 176 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", |
158 | ids.chip_id, ARCH2NAME(ids.architecture)); | 177 | ids.chip_id, ARCH2NAME(ids.architecture)); |
159 | #if INTERRUPT_DEBUG | 178 | #if INTERRUPT_DEBUG |
@@ -430,6 +449,9 @@ static void __exit nvdebug_exit(void) { | |||
430 | #if INTERRUPT_DEBUG | 449 | #if INTERRUPT_DEBUG |
431 | free_irq(g->pcid->irq, g->pcid); | 450 | free_irq(g->pcid->irq, g->pcid); |
432 | #endif // INTERRUPT_DEBUG | 451 | #endif // INTERRUPT_DEBUG |
452 | } else { | ||
453 | if (g->regs) | ||
454 | iounmap(g->regs); | ||
433 | } | 455 | } |
434 | printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id); | 456 | printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id); |
435 | } | 457 | } |