diff options
| author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 15:38:53 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 15:38:53 -0400 |
| commit | 3653aee74ae8338b9da1f0304b0eaa1171dd640f (patch) | |
| tree | 554c12f901c209b962ca9dac48bde9d3c59f2281 | |
| parent | 48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (diff) | |
Correctly check for read errors in the nvdebug_read* functions
Follows how NVIDIA's open-source GPU driver checks for bad reads.
| -rw-r--r-- | nvdebug_linux.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/nvdebug_linux.c b/nvdebug_linux.c index 1d76bc9..830ec6e 100644 --- a/nvdebug_linux.c +++ b/nvdebug_linux.c | |||
| @@ -13,9 +13,17 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | |||
| 13 | return -1; | 13 | return -1; |
| 14 | } | 14 | } |
| 15 | ret = readl(s->regs + r); | 15 | ret = readl(s->regs + r); |
| 16 | // It seems like the GPU returns this as a flag value for bad addresses | 16 | // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the |
| 17 | if (ret == 0xbadf5040) { | 17 | // upper 3 nibbles when there is a possible issue". Further code uses the |
| 18 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r); | 18 | // middle three nibbles as an error code, and ignores the bottom two. |
| 19 | if ((ret & 0xfff00000) == 0xbad00000) { | ||
| 20 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data of %#10x\n", r, ret); | ||
| 21 | // It would be best to check INTR_0_PRI_* error is pending, to verify | ||
| 22 | // that this was actually a bad read. Possible future work... | ||
| 23 | // Generally a failure here in the context of nvdebug indicates that a | ||
| 24 | // register does not exist on this platform, but one can know for sure | ||
| 25 | // by checking which NV_PPRIV_SYS_PRI_ERROR_CODE_* define the bad read | ||
| 26 | // matches. | ||
| 19 | return -1; | 27 | return -1; |
| 20 | } | 28 | } |
| 21 | return ret; | 29 | return ret; |
| @@ -28,12 +36,12 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | |||
| 28 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); | 36 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); |
| 29 | return -1; | 37 | return -1; |
| 30 | } | 38 | } |
| 31 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl | 39 | // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl |
| 32 | ret = readl(s->regs + r); | 40 | ret = readl(s->regs + r); |
| 33 | ret |= ((u64)readl(s->regs + r + 4)) << 32; | 41 | ret |= ((u64)readl(s->regs + r + 4)) << 32; |
| 34 | // It seems like the GPU returns this as a flag value for bad addresses | 42 | // See comment in nvdebug_readl() regarding error checking |
| 35 | if ((ret & 0xffffffffull) == 0xbadf5040ull) { | 43 | if ((ret & 0xfff00000ull) == 0xbad00000ull) { |
| 36 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r); | 44 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret); |
| 37 | return -1; | 45 | return -1; |
| 38 | } | 46 | } |
| 39 | return ret; | 47 | return ret; |
| @@ -50,7 +58,7 @@ void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { | |||
| 50 | } | 58 | } |
| 51 | 59 | ||
| 52 | // quadword version of nvdebug_writel() | 60 | // quadword version of nvdebug_writel() |
| 53 | // XXX: This probably doesn't work XXX: Untested | 61 | // XXX: Not clear this works on all platforms |
| 54 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | 62 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { |
| 55 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 63 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
| 56 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); | 64 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); |
