diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-25 15:58:37 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-25 15:58:37 -0400 |
commit | 8340d234d78a7d0f46c11a584de538148b78b7cb (patch) | |
tree | 4837d0debd711fbe7a5cd292c2606b8c1e5d50aa /nvdebug_linux.c | |
parent | e2fe4cb56e6252b9cf0b43c6180efbb20a168ce0 (diff) |
Remove dependency on Jetson (nvgpu) driver internals
For integrated (Jetson) GPUs:
- Directly retrieve and map GPU register region 0
- Directly check GPU power-on state before a register read/write
- Resume the GPU as needed for a register read/write
Most nvgpu APIs can now be called on TX2+ integrated GPUs without
first having to start some task on the GPU to make it non-suspended.
Tested on Jetson TX1, TX2, Xavier, and Orin.
Diffstat (limited to 'nvdebug_linux.c')
-rw-r--r-- | nvdebug_linux.c | 35 |
1 files changed, 26 insertions, 9 deletions
diff --git a/nvdebug_linux.c b/nvdebug_linux.c index 830ec6e..111d5aa 100644 --- a/nvdebug_linux.c +++ b/nvdebug_linux.c | |||
@@ -3,16 +3,22 @@ | |||
3 | */ | 3 | */ |
4 | #include "nvdebug_linux.h" | 4 | #include "nvdebug_linux.h" |
5 | #include <asm/io.h> // For read[l,q] and write[l,q] | 5 | #include <asm/io.h> // For read[l,q] and write[l,q] |
6 | #include <linux/pm_runtime.h> // For pm_runtime_[enabled,get,put]() | ||
6 | 7 | ||
7 | // Similar to nvgpu_readl() | ||
8 | // (except we don't try to resolve situations where regs is NULL) | ||
9 | u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | 8 | u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { |
10 | u32 ret; | 9 | u32 ret; |
11 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 10 | // If this is an integrated ("platform") GPU, make sure that it's on first |
11 | // (pm_runtime_enabled() will return false until nvgpu is started. Once | ||
12 | // nvgpu is started, pm_runtime_get() will attempt to resume the GPU.) | ||
13 | // This works to bring up the TX2, Xavier, and Orin, but not the TX1. | ||
14 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | ||
12 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); | 15 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); |
13 | return -1; | 16 | return -1; |
14 | } | 17 | } |
15 | ret = readl(s->regs + r); | 18 | ret = readl(s->regs + r); |
19 | // If an integrated GPU, allow it to suspend again (if idle) | ||
20 | if (s->platd) | ||
21 | pm_runtime_put(s->dev); | ||
16 | // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the | 22 | // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the |
17 | // upper 3 nibbles when there is a possible issue". Further code uses the | 23 | // upper 3 nibbles when there is a possible issue". Further code uses the |
18 | // middle three nibbles as an error code, and ignores the bottom two. | 24 | // middle three nibbles as an error code, and ignores the bottom two. |
@@ -29,16 +35,20 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | |||
29 | return ret; | 35 | return ret; |
30 | } | 36 | } |
31 | 37 | ||
32 | // quadword version of nvdebug_readl() | 38 | // quadword (8-byte) version of nvdebug_readl() |
33 | u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | 39 | u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { |
34 | u64 ret; | 40 | u64 ret; |
35 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 41 | // If this is an integrated ("platform") GPU, make sure that it's on first |
42 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | ||
36 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); | 43 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); |
37 | return -1; | 44 | return -1; |
38 | } | 45 | } |
39 | // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl | 46 | // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl |
40 | ret = readl(s->regs + r); | 47 | ret = readl(s->regs + r); |
41 | ret |= ((u64)readl(s->regs + r + 4)) << 32; | 48 | ret |= ((u64)readl(s->regs + r + 4)) << 32; |
49 | // If an integrated GPU, allow it to suspend again (if idle) | ||
50 | if (s->platd) | ||
51 | pm_runtime_put(s->dev); | ||
42 | // See comment in nvdebug_readl() regarding error checking | 52 | // See comment in nvdebug_readl() regarding error checking |
43 | if ((ret & 0xfff00000ull) == 0xbad00000ull) { | 53 | if ((ret & 0xfff00000ull) == 0xbad00000ull) { |
44 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret); | 54 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret); |
@@ -47,23 +57,30 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | |||
47 | return ret; | 57 | return ret; |
48 | } | 58 | } |
49 | 59 | ||
50 | // Similar to nvgpu_writel() | ||
51 | void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { | 60 | void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { |
52 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 61 | // If this is an integrated ("platform") GPU, make sure that it's on first |
62 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | ||
53 | printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); | 63 | printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); |
54 | return; | 64 | return; |
55 | } | 65 | } |
56 | writel_relaxed(v, s->regs + r); | 66 | writel_relaxed(v, s->regs + r); |
57 | wmb(); | 67 | wmb(); |
68 | // If an integrated GPU, allow it to suspend again (if idle) | ||
69 | if (s->platd) | ||
70 | pm_runtime_put(s->dev); | ||
58 | } | 71 | } |
59 | 72 | ||
60 | // quadword version of nvdebug_writel() | 73 | // quadword (8-byte) version of nvdebug_writel() |
61 | // XXX: Not clear this works on all platforms | 74 | // XXX: Not clear this works on all platforms |
62 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | 75 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { |
63 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 76 | // If this is an integrated ("platform") GPU, make sure that it's on first |
77 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | ||
64 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); | 78 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); |
65 | return; | 79 | return; |
66 | } | 80 | } |
67 | writeq_relaxed(v, s->regs + r); | 81 | writeq_relaxed(v, s->regs + r); |
68 | wmb(); | 82 | wmb(); |
83 | // If an integrated GPU, allow it to suspend again (if idle) | ||
84 | if (s->platd) | ||
85 | pm_runtime_put(s->dev); | ||
69 | } | 86 | } |