1 files changed, 26 insertions, 9 deletions
diff --git a/nvdebug_linux.c b/nvdebug_linux.c
index 830ec6e..111d5aa 100644
--- a/nvdebug_linux.c
+++ b/nvdebug_linux.c
@@ -3,16 +3,22 @@
 */
 #include "nvdebug_linux.h"
 #include <asm/io.h> // For read[l,q] and write[l,q]
+#include <linux/pm_runtime.h> // For pm_runtime_[enabled,get,put]()
-// Similar to nvgpu_readl()
-// (except we don't try to resolve situations where regs is NULL)
 u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
        u32 ret;
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        // (pm_runtime_enabled() will return false until nvgpu is started. Once
+        // nvgpu is started, pm_runtime_get() will attempt to resume the GPU.)
+        // This works to bring up the TX2, Xavier, and Orin, but not the TX1.
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n");
                return -1;
        }
        ret = readl(s->regs + r);
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
        // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the
        // upper 3 nibbles when there is a possible issue". Further code uses the
        // middle three nibbles as an error code, and ignores the bottom two.
@@ -29,16 +35,20 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
        return ret;
 }
-// quadword version of nvdebug_readl()
+// quadword (8-byte) version of nvdebug_readl()
 u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
        u64 ret;
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n");
                return -1;
        }
        // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl
        ret = readl(s->regs + r);
        ret |= ((u64)readl(s->regs + r + 4)) << 32;
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
        // See comment in nvdebug_readl() regarding error checking
        if ((ret & 0xfff00000ull) == 0xbad00000ull) {
                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret);
@@ -47,23 +57,30 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
        return ret;
 }
-// Similar to nvgpu_writel()
 void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n");
                return;
        }
        writel_relaxed(v, s->regs + r);
        wmb();
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
 }
-// quadword version of nvdebug_writel()
+// quadword (8-byte) version of nvdebug_writel()
 // XXX: Not clear this works on all platforms
 void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+        // If this is an integrated ("platform") GPU, make sure that it's on first
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n");
                return;
        }
        writeq_relaxed(v, s->regs + r);
        wmb();
+        // If an integrated GPU, allow it to suspend again (if idle)
+        if (s->platd)
+                pm_runtime_put(s->dev);
 }