5 files changed, 61 insertions, 39 deletions
diff --git a/Makefile b/Makefile
index 972bc0b..fea3819 100644
--- a/Makefile
+++ b/Makefile
@@ -4,13 +4,6 @@ nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \
 KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\"
 # -mfentry above if not building due to mcount missing
-# TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...)
-ccflags-y += -I$(PWD)/include
-#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include
-#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu
-#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include
-#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include/uapi
 all:
        make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
 clean:
diff --git a/nvdebug.h b/nvdebug.h
index 80f1a74..ca0f514 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -1470,6 +1470,8 @@ struct nvdebug_state {
        struct gk20a *g;
        // Pointer to PCI device needed for pci_iounmap and pci_resource_start
        struct pci_dev *pcid;
+        // Pointer to platform device needed for platform_get_resource
+        struct platform_device *platd;
        // Pointer to generic device struct (both platform and pcie devices)
        struct device *dev;
 };
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index d5df7db..3a10e13 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>  // For PCI device scanning
+#include <linux/platform_device.h>  // For platform_device struct
 #include <linux/proc_fs.h>  // So we can set up entries in /proc
 #include "nvdebug_linux.h"
@@ -114,17 +115,33 @@ int probe_and_cache_devices(void) {
        // TODO: Support other platform bus devices (gk20a - TK1)
        if (dev) {
                mc_boot_0_t ids;
+                struct platform_device *platd = container_of(dev, struct platform_device, dev);
+                struct resource *regs = platform_get_resource(platd, IORESOURCE_MEM, 0);
                g_nvdebug_state[i].g = get_gk20a(dev);
-                g_nvdebug_state[i].regs = gk20a_regs(g_nvdebug_state[i].g);
+                if (!regs)
-                if (!g_nvdebug_state[i].regs)
                        return -EADDRNOTAVAIL;
-                ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
+                g_nvdebug_state[i].regs = ioremap(regs->start, resource_size(regs));
-                if (ids.raw == -1)
+                if (!g_nvdebug_state[i].regs) {
+                        printk(KERN_ERR "[nvdebug] Unable to map BAR0 on the integrated GPU\n");
                        return -EADDRNOTAVAIL;
-                g_nvdebug_state[i].chip_id = ids.chip_id;
+                }
+                // The Jetson TX1, TX2, Xavier, and Orin do not have a BAR2 (but do have
+                // BAR1). On the TX2+, all their platform resources are:
+                //   [nvdebug] Region 0: Memory at 17000000 [size=16777216]
+                //   [nvdebug] Region 1: Memory at 18000000 [size=16777216]
+                //   [nvdebug] Region 2: Memory at 3b41000 [size=4096]
+                // The TX1 has the same regions, but at different base addresses.
                g_nvdebug_state[i].bar3 = NULL;
                g_nvdebug_state[i].pcid = NULL;
+                g_nvdebug_state[i].platd = platd;
                g_nvdebug_state[i].dev = dev;
+                // Don't check Chip ID until everything else is initalized
+                ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
+                if (ids.raw == -1) {
+                        printk(KERN_ERR "[nvdebug] Unable to read config from Master Controller on the integrated GPU\n");
+                        return -EADDRNOTAVAIL;
+                }
+                g_nvdebug_state[i].chip_id = ids.chip_id;
                printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.",
                       ids.chip_id, ARCH2NAME(ids.architecture));
                i++;
@@ -140,12 +157,6 @@ int probe_and_cache_devices(void) {
                        pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n");
                        return -EADDRNOTAVAIL;
                }
-                ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
-                if (ids.raw == -1) {
-                        pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
-                        return -EADDRNOTAVAIL;
-                }
-                g_nvdebug_state[i].chip_id = ids.chip_id;
                // Map BAR3 (CPU-accessible mappings of GPU DRAM)
                g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0);
                // XXX: Try mapping only the lower half of BAR3 on fail
@@ -153,7 +164,15 @@ int probe_and_cache_devices(void) {
                if (!g_nvdebug_state[i].bar3)
                        g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2);
                g_nvdebug_state[i].pcid = pcid;
+                g_nvdebug_state[i].platd = NULL;
                g_nvdebug_state[i].dev = &pcid->dev;
+                // Don't check Chip ID until everything else is initalized
+                ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
+                if (ids.raw == -1) {
+                        pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
+                        return -EADDRNOTAVAIL;
+                }
+                g_nvdebug_state[i].chip_id = ids.chip_id;
                printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.",
                       ids.chip_id, ARCH2NAME(ids.architecture));
 #if INTERRUPT_DEBUG
@@ -430,6 +449,9 @@ static void __exit nvdebug_exit(void) {
 #if INTERRUPT_DEBUG
                        free_irq(g->pcid->irq, g->pcid);
 #endif // INTERRUPT_DEBUG
+                } else {
+                        if (g->regs)
+                                iounmap(g->regs);
                }
                printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id);
        }
diff --git a/nvdebug_linux.c b/nvdebug_linux.c
index 830ec6e..111d5aa 100644
--- a/nvdebug_linux.c
+++ b/nvdebug_linux.c
@@ -3,16 +3,22 @@
 */
 #include "nvdebug_linux.h"
 #include <asm/io.h> // For read[l,q] and write[l,q]
+#include <linux/pm_runtime.h> // For pm_runtime_[enabled,get,put]()
-// Similar to nvgpu_readl()
-// (except we don't try to resolve situations where regs is NULL)
 u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
        u32 ret;
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        // (pm_runtime_enabled() will return false until nvgpu is started. Once
+        // nvgpu is started, pm_runtime_get() will attempt to resume the GPU.)
+        // This works to bring up the TX2, Xavier, and Orin, but not the TX1.
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n");
                return -1;
        }
        ret = readl(s->regs + r);
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
        // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the
        // upper 3 nibbles when there is a possible issue". Further code uses the
        // middle three nibbles as an error code, and ignores the bottom two.
@@ -29,16 +35,20 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
        return ret;
 }
-// quadword version of nvdebug_readl()
+// quadword (8-byte) version of nvdebug_readl()
 u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
        u64 ret;
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n");
                return -1;
        }
        // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl
        ret = readl(s->regs + r);
        ret |= ((u64)readl(s->regs + r + 4)) << 32;
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
        // See comment in nvdebug_readl() regarding error checking
        if ((ret & 0xfff00000ull) == 0xbad00000ull) {
                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret);
@@ -47,23 +57,30 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
        return ret;
 }
-// Similar to nvgpu_writel()
 void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n");
                return;
        }
        writel_relaxed(v, s->regs + r);
        wmb();
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
 }
-// quadword version of nvdebug_writel()
+// quadword (8-byte) version of nvdebug_writel()
 // XXX: Not clear this works on all platforms
 void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n");
                return;
        }
        writeq_relaxed(v, s->regs + r);
        wmb();
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
 }
diff --git a/nvdebug_linux.h b/nvdebug_linux.h
index 022d1cf..2ad4ce1 100644
--- a/nvdebug_linux.h
+++ b/nvdebug_linux.h
@@ -35,15 +35,3 @@ static inline int file2parentgpuidx(const struct file *f) {
        // module.
        return (uintptr_t)pde_data(file_dentry(f)->d_parent->d_inode);
 }
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
-// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry
-// of the gk20a struct (after a function pointer). This change was made as L4T
-// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+)
-// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA
-// i.e. if FUSA was enabled, this is wrong.
-#define gk20a_regs(gk20a) (*(void**)((void*)gk20a + sizeof(void(*)(void))))
-#else
-#include <os/linux/os_linux.h>  // For struct nvgpu_os_linux, which holds regs
-#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs)
-#endif