diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 15:38:53 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 15:38:53 -0400 |
commit | 3653aee74ae8338b9da1f0304b0eaa1171dd640f (patch) | |
tree | 554c12f901c209b962ca9dac48bde9d3c59f2281 | |
parent | 48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (diff) |
Correctly check for read errors in the nvdebug_read* functions
Follows how NVIDIA's open-source GPU driver checks for bad reads.
-rw-r--r-- | nvdebug_linux.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/nvdebug_linux.c b/nvdebug_linux.c index 1d76bc9..830ec6e 100644 --- a/nvdebug_linux.c +++ b/nvdebug_linux.c | |||
@@ -13,9 +13,17 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | |||
13 | return -1; | 13 | return -1; |
14 | } | 14 | } |
15 | ret = readl(s->regs + r); | 15 | ret = readl(s->regs + r); |
16 | // It seems like the GPU returns this as a flag value for bad addresses | 16 | // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the |
17 | if (ret == 0xbadf5040) { | 17 | // upper 3 nibbles when there is a possible issue". Further code uses the |
18 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r); | 18 | // middle three nibbles as an error code, and ignores the bottom two. |
19 | if ((ret & 0xfff00000) == 0xbad00000) { | ||
20 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data of %#10x\n", r, ret); | ||
21 | // It would be best to check INTR_0_PRI_* error is pending, to verify | ||
22 | // that this was actually a bad read. Possible future work... | ||
23 | // Generally a failure here in the context of nvdebug indicates that a | ||
24 | // register does not exist on this platform, but one can know for sure | ||
25 | // by checking which NV_PPRIV_SYS_PRI_ERROR_CODE_* define the bad read | ||
26 | // matches. | ||
19 | return -1; | 27 | return -1; |
20 | } | 28 | } |
21 | return ret; | 29 | return ret; |
@@ -28,12 +36,12 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | |||
28 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); | 36 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); |
29 | return -1; | 37 | return -1; |
30 | } | 38 | } |
31 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl | 39 | // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl |
32 | ret = readl(s->regs + r); | 40 | ret = readl(s->regs + r); |
33 | ret |= ((u64)readl(s->regs + r + 4)) << 32; | 41 | ret |= ((u64)readl(s->regs + r + 4)) << 32; |
34 | // It seems like the GPU returns this as a flag value for bad addresses | 42 | // See comment in nvdebug_readl() regarding error checking |
35 | if ((ret & 0xffffffffull) == 0xbadf5040ull) { | 43 | if ((ret & 0xfff00000ull) == 0xbad00000ull) { |
36 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r); | 44 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret); |
37 | return -1; | 45 | return -1; |
38 | } | 46 | } |
39 | return ret; | 47 | return ret; |
@@ -50,7 +58,7 @@ void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { | |||
50 | } | 58 | } |
51 | 59 | ||
52 | // quadword version of nvdebug_writel() | 60 | // quadword version of nvdebug_writel() |
53 | // XXX: This probably doesn't work XXX: Untested | 61 | // XXX: Not clear this works on all platforms |
54 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | 62 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { |
55 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 63 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
56 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); | 64 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); |