diff options
author | Joshua Bakita <jbakita@cs.unc.edu> | 2025-04-04 10:29:54 -0400 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2025-04-04 10:29:54 -0400 |
commit | 494df296bf4abe9b2b484bde1a4fad28c989afec (patch) | |
tree | 123a4b696545e70a9953ada907ed00dfccb6038a | |
parent | 6143114460e5125621747cde2f712fed445b9a15 (diff) |
When the GPU is powered off, attempts to read any of its registers
(such as via nvdebug_readl()) result in a fatal interrupt. The
pm_runtime_get() call included in nvdebug sent a request to nvgpu
to turn the GPU back on. **However,** this call did not wait for
the power-on command to take effect. This resulted in a race between
nvdebug and the power management logic, meaning that the GPU may not
have powered-on by the time that nvdebug attempted to read its
registers.
Use pm_runtime_get_sync() instead, which explicitly waits for the
power-on command to complete (or fail) before returning. This
eliminates the race condition.
Thank you to Diego Alejandro Parra Guzman
<diego.guzman@tttech-auto.com>, who brought this issue to my
attention.
-rw-r--r-- | nvdebug_linux.c | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/nvdebug_linux.c b/nvdebug_linux.c index 111d5aa..e673a8b 100644 --- a/nvdebug_linux.c +++ b/nvdebug_linux.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Copyright 2024 Joshua Bakita | 1 | /* Copyright 2024-2025 Joshua Bakita |
2 | * Implementation of Kernel-specific function implementations | 2 | * Implementation of Kernel-specific function implementations |
3 | */ | 3 | */ |
4 | #include "nvdebug_linux.h" | 4 | #include "nvdebug_linux.h" |
@@ -9,10 +9,14 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | |||
9 | u32 ret; | 9 | u32 ret; |
10 | // If this is an integrated ("platform") GPU, make sure that it's on first | 10 | // If this is an integrated ("platform") GPU, make sure that it's on first |
11 | // (pm_runtime_enabled() will return false until nvgpu is started. Once | 11 | // (pm_runtime_enabled() will return false until nvgpu is started. Once |
12 | // nvgpu is started, pm_runtime_get() will attempt to resume the GPU.) | 12 | // nvgpu is started, pm_runtime_get_sync() will attempt to resume the GPU. |
13 | // This still increments the usage counter on failure, so we undo that with | ||
14 | // pm_runtime_put_noidle(). We avoid pm_runtime_resume_and_get() as it was | ||
15 | // not added until Linux 5.9.11) | ||
13 | // This works to bring up the TX2, Xavier, and Orin, but not the TX1. | 16 | // This works to bring up the TX2, Xavier, and Orin, but not the TX1. |
14 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | 17 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get_sync(s->dev) < 0)) { |
15 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); | 18 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); |
19 | pm_runtime_put_noidle(s->dev); // No-op if !pm_runtime_enabled() | ||
16 | return -1; | 20 | return -1; |
17 | } | 21 | } |
18 | ret = readl(s->regs + r); | 22 | ret = readl(s->regs + r); |
@@ -39,8 +43,9 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | |||
39 | u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | 43 | u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { |
40 | u64 ret; | 44 | u64 ret; |
41 | // If this is an integrated ("platform") GPU, make sure that it's on first | 45 | // If this is an integrated ("platform") GPU, make sure that it's on first |
42 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | 46 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get_sync(s->dev) < 0)) { |
43 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); | 47 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); |
48 | pm_runtime_put_noidle(s->dev); // No-op if !pm_runtime_enabled() | ||
44 | return -1; | 49 | return -1; |
45 | } | 50 | } |
46 | // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl | 51 | // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl |
@@ -59,8 +64,9 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | |||
59 | 64 | ||
60 | void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { | 65 | void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { |
61 | // If this is an integrated ("platform") GPU, make sure that it's on first | 66 | // If this is an integrated ("platform") GPU, make sure that it's on first |
62 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | 67 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get_sync(s->dev) < 0)) { |
63 | printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); | 68 | printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); |
69 | pm_runtime_put_noidle(s->dev); // No-op if !pm_runtime_enabled() | ||
64 | return; | 70 | return; |
65 | } | 71 | } |
66 | writel_relaxed(v, s->regs + r); | 72 | writel_relaxed(v, s->regs + r); |
@@ -74,8 +80,9 @@ void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { | |||
74 | // XXX: Not clear this works on all platforms | 80 | // XXX: Not clear this works on all platforms |
75 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | 81 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { |
76 | // If this is an integrated ("platform") GPU, make sure that it's on first | 82 | // If this is an integrated ("platform") GPU, make sure that it's on first |
77 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | 83 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get_sync(s->dev) < 0)) { |
78 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); | 84 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); |
85 | pm_runtime_put_noidle(s->dev); // No-op if !pm_runtime_enabled() | ||
79 | return; | 86 | return; |
80 | } | 87 | } |
81 | writeq_relaxed(v, s->regs + r); | 88 | writeq_relaxed(v, s->regs + r); |