Remove dependency on Jetson (nvgpu) driver internals

For integrated (Jetson) GPUs: - Directly retrieve and map GPU register region 0 - Directly check GPU power-on state before a register read/write - Resume the GPU as needed for a register read/write Most nvgpu APIs can now be called on TX2+ integrated GPUs without first having to start some task on the GPU to make it non-suspended. Tested on Jetson TX1, TX2, Xavier, and Orin.
author: Joshua Bakita <bakitajoshua@gmail.com> 2024-09-25 15:58:37 -0400
committer: Joshua Bakita <bakitajoshua@gmail.com> 2024-09-25 15:58:37 -0400
commit: 8340d234d78a7d0f46c11a584de538148b78b7cb (patch)
tree: 4837d0debd711fbe7a5cd292c2606b8c1e5d50aa
parent: e2fe4cb56e6252b9cf0b43c6180efbb20a168ce0 (diff)
5 files changed, 61 insertions, 39 deletions
diff --git a/Makefile b/Makefile
index 972bc0b..fea3819 100644
--- a/Makefile
+++ b/Makefile
@@ -4,13 +4,6 @@ nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \
 KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\"
 # -mfentry above if not building due to mcount missing
-# TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...)
-ccflags-y += -I$(PWD)/include
-#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include
-#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu
-#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include
-#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include/uapi
 all:
        make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
 clean:
diff --git a/nvdebug.h b/nvdebug.h
index 80f1a74..ca0f514 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -1470,6 +1470,8 @@ struct nvdebug_state {
        struct gk20a *g;
        // Pointer to PCI device needed for pci_iounmap and pci_resource_start
        struct pci_dev *pcid;
+        // Pointer to platform device needed for platform_get_resource
+        struct platform_device *platd;
        // Pointer to generic device struct (both platform and pcie devices)
        struct device *dev;
 };
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index d5df7db..3a10e13 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>  // For PCI device scanning
+#include <linux/platform_device.h>  // For platform_device struct
 #include <linux/proc_fs.h>  // So we can set up entries in /proc
 #include "nvdebug_linux.h"
@@ -114,17 +115,33 @@ int probe_and_cache_devices(void) {
        // TODO: Support other platform bus devices (gk20a - TK1)
        if (dev) {
                mc_boot_0_t ids;
+                struct platform_device *platd = container_of(dev, struct platform_device, dev);
+                struct resource *regs = platform_get_resource(platd, IORESOURCE_MEM, 0);
                g_nvdebug_state[i].g = get_gk20a(dev);
-                g_nvdebug_state[i].regs = gk20a_regs(g_nvdebug_state[i].g);
+                if (!regs)
-                if (!g_nvdebug_state[i].regs)
                        return -EADDRNOTAVAIL;
-                ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
+                g_nvdebug_state[i].regs = ioremap(regs->start, resource_size(regs));
-                if (ids.raw == -1)
+                if (!g_nvdebug_state[i].regs) {
+                        printk(KERN_ERR "[nvdebug] Unable to map BAR0 on the integrated GPU\n");
                        return -EADDRNOTAVAIL;
-                g_nvdebug_state[i].chip_id = ids.chip_id;
+                }
+                // The Jetson TX1, TX2, Xavier, and Orin do not have a BAR2 (but do have
+                // BAR1). On the TX2+, all their platform resources are:
+                //   [nvdebug] Region 0: Memory at 17000000 [size=16777216]
+                //   [nvdebug] Region 1: Memory at 18000000 [size=16777216]
+                //   [nvdebug] Region 2: Memory at 3b41000 [size=4096]
+                // The TX1 has the same regions, but at different base addresses.
                g_nvdebug_state[i].bar3 = NULL;
                g_nvdebug_state[i].pcid = NULL;
+                g_nvdebug_state[i].platd = platd;
                g_nvdebug_state[i].dev = dev;
+                // Don't check Chip ID until everything else is initalized
+                ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
+                if (ids.raw == -1) {
+                        printk(KERN_ERR "[nvdebug] Unable to read config from Master Controller on the integrated GPU\n");
+                        return -EADDRNOTAVAIL;
+                }
+                g_nvdebug_state[i].chip_id = ids.chip_id;
                printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.",
                       ids.chip_id, ARCH2NAME(ids.architecture));
                i++;
@@ -140,12 +157,6 @@ int probe_and_cache_devices(void) {
                        pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n");
                        return -EADDRNOTAVAIL;
                }
-                ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
-                if (ids.raw == -1) {
-                        pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
-                        return -EADDRNOTAVAIL;
-                }
-                g_nvdebug_state[i].chip_id = ids.chip_id;
                // Map BAR3 (CPU-accessible mappings of GPU DRAM)
                g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0);
                // XXX: Try mapping only the lower half of BAR3 on fail
@@ -153,7 +164,15 @@ int probe_and_cache_devices(void) {
                if (!g_nvdebug_state[i].bar3)
                        g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2);
                g_nvdebug_state[i].pcid = pcid;
+                g_nvdebug_state[i].platd = NULL;
                g_nvdebug_state[i].dev = &pcid->dev;
+                // Don't check Chip ID until everything else is initalized
+                ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
+                if (ids.raw == -1) {
+                        pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
+                        return -EADDRNOTAVAIL;
+                }
+                g_nvdebug_state[i].chip_id = ids.chip_id;
                printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.",
                       ids.chip_id, ARCH2NAME(ids.architecture));
 #if INTERRUPT_DEBUG
@@ -430,6 +449,9 @@ static void __exit nvdebug_exit(void) {
 #if INTERRUPT_DEBUG
                        free_irq(g->pcid->irq, g->pcid);
 #endif // INTERRUPT_DEBUG
+                } else {
+                        if (g->regs)
+                                iounmap(g->regs);
                }
                printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id);
        }
diff --git a/nvdebug_linux.c b/nvdebug_linux.c
index 830ec6e..111d5aa 100644
--- a/nvdebug_linux.c
+++ b/nvdebug_linux.c
@@ -3,16 +3,22 @@
 */
 #include "nvdebug_linux.h"
 #include <asm/io.h> // For read[l,q] and write[l,q]
+#include <linux/pm_runtime.h> // For pm_runtime_[enabled,get,put]()
-// Similar to nvgpu_readl()
-// (except we don't try to resolve situations where regs is NULL)
 u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
        u32 ret;
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        // (pm_runtime_enabled() will return false until nvgpu is started. Once
+        // nvgpu is started, pm_runtime_get() will attempt to resume the GPU.)
+        // This works to bring up the TX2, Xavier, and Orin, but not the TX1.
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n");
                return -1;
        }
        ret = readl(s->regs + r);
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
        // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the
        // upper 3 nibbles when there is a possible issue". Further code uses the
        // middle three nibbles as an error code, and ignores the bottom two.
@@ -29,16 +35,20 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
        return ret;
 }
-// quadword version of nvdebug_readl()
+// quadword (8-byte) version of nvdebug_readl()
 u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
        u64 ret;
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n");
                return -1;
        }
        // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl
        ret = readl(s->regs + r);
        ret |= ((u64)readl(s->regs + r + 4)) << 32;
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
        // See comment in nvdebug_readl() regarding error checking
        if ((ret & 0xfff00000ull) == 0xbad00000ull) {
                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret);
@@ -47,23 +57,30 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
        return ret;
 }
-// Similar to nvgpu_writel()
 void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU o
author	Joshua Bakita <bakitajoshua@gmail.com>	2024-09-25 15:58:37 -0400
committer	Joshua Bakita <bakitajoshua@gmail.com>	2024-09-25 15:58:37 -0400
commit	8340d234d78a7d0f46c11a584de538148b78b7cb (patch)
tree	4837d0debd711fbe7a5cd292c2606b8c1e5d50aa
parent	e2fe4cb56e6252b9cf0b43c6180efbb20a168ce0 (diff)