1 files changed, 13 insertions, 6 deletions
diff --git a/nvdebug_linux.c b/nvdebug_linux.c
index 111d5aa..e673a8b 100644
--- a/nvdebug_linux.c
+++ b/nvdebug_linux.c
@@ -1,4 +1,4 @@
-/* Copyright 2024 Joshua Bakita
+/* Copyright 2024-2025 Joshua Bakita
 * Implementation of Kernel-specific function implementations
 */
 #include "nvdebug_linux.h"
@@ -9,10 +9,14 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
        u32 ret;
        // If this is an integrated ("platform") GPU, make sure that it's on first
        // (pm_runtime_enabled() will return false until nvgpu is started. Once
-        // nvgpu is started, pm_runtime_get() will attempt to resume the GPU.)
+        // nvgpu is started, pm_runtime_get_sync() will attempt to resume the GPU.
+        // This still increments the usage counter on failure, so we undo that with
+        // pm_runtime_put_noidle(). We avoid pm_runtime_resume_and_get() as it was
+        // not added until Linux 5.9.11)
        // This works to bring up the TX2, Xavier, and Orin, but not the TX1.
-        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get_sync(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n");
+                pm_runtime_put_noidle(s->dev); // No-op if !pm_runtime_enabled()
                return -1;
        }
        ret = readl(s->regs + r);
@@ -39,8 +43,9 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
 u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
        u64 ret;
        // If this is an integrated ("platform") GPU, make sure that it's on first
-        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get_sync(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n");
+                pm_runtime_put_noidle(s->dev); // No-op if !pm_runtime_enabled()
                return -1;
        }
        // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl
@@ -59,8 +64,9 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
 void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
        // If this is an integrated ("platform") GPU, make sure that it's on first
-        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get_sync(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n");
+                pm_runtime_put_noidle(s->dev); // No-op if !pm_runtime_enabled()
                return;
        }
        writel_relaxed(v, s->regs + r);
@@ -74,8 +80,9 @@ void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
 // XXX: Not clear this works on all platforms
 void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
        // If this is an integrated ("platform") GPU, make sure that it's on first
-        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
+        if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get_sync(s->dev) < 0)) {
                printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n");
+                pm_runtime_put_noidle(s->dev); // No-op if !pm_runtime_enabled()
                return;
        }
        writeq_relaxed(v, s->regs + r);