aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile7
-rw-r--r--nvdebug.h2
-rw-r--r--nvdebug_entry.c44
-rw-r--r--nvdebug_linux.c35
-rw-r--r--nvdebug_linux.h12
5 files changed, 61 insertions, 39 deletions
diff --git a/Makefile b/Makefile
index 972bc0b..fea3819 100644
--- a/Makefile
+++ b/Makefile
@@ -4,13 +4,6 @@ nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \
4KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" 4KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\"
5# -mfentry above if not building due to mcount missing 5# -mfentry above if not building due to mcount missing
6 6
7# TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...)
8ccflags-y += -I$(PWD)/include
9#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include
10#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu
11#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include
12#ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include/uapi
13
14all: 7all:
15 make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules 8 make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
16clean: 9clean:
diff --git a/nvdebug.h b/nvdebug.h
index 80f1a74..ca0f514 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -1470,6 +1470,8 @@ struct nvdebug_state {
1470 struct gk20a *g; 1470 struct gk20a *g;
1471 // Pointer to PCI device needed for pci_iounmap and pci_resource_start 1471 // Pointer to PCI device needed for pci_iounmap and pci_resource_start
1472 struct pci_dev *pcid; 1472 struct pci_dev *pcid;
1473 // Pointer to platform device needed for platform_get_resource
1474 struct platform_device *platd;
1473 // Pointer to generic device struct (both platform and pcie devices) 1475 // Pointer to generic device struct (both platform and pcie devices)
1474 struct device *dev; 1476 struct device *dev;
1475}; 1477};
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index d5df7db..3a10e13 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -7,6 +7,7 @@
7#include <linux/kernel.h> 7#include <linux/kernel.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/pci.h> // For PCI device scanning 9#include <linux/pci.h> // For PCI device scanning
10#include <linux/platform_device.h> // For platform_device struct
10#include <linux/proc_fs.h> // So we can set up entries in /proc 11#include <linux/proc_fs.h> // So we can set up entries in /proc
11 12
12#include "nvdebug_linux.h" 13#include "nvdebug_linux.h"
@@ -114,17 +115,33 @@ int probe_and_cache_devices(void) {
114 // TODO: Support other platform bus devices (gk20a - TK1) 115 // TODO: Support other platform bus devices (gk20a - TK1)
115 if (dev) { 116 if (dev) {
116 mc_boot_0_t ids; 117 mc_boot_0_t ids;
118 struct platform_device *platd = container_of(dev, struct platform_device, dev);
119 struct resource *regs = platform_get_resource(platd, IORESOURCE_MEM, 0);
117 g_nvdebug_state[i].g = get_gk20a(dev); 120 g_nvdebug_state[i].g = get_gk20a(dev);
118 g_nvdebug_state[i].regs = gk20a_regs(g_nvdebug_state[i].g); 121 if (!regs)
119 if (!g_nvdebug_state[i].regs)
120 return -EADDRNOTAVAIL; 122 return -EADDRNOTAVAIL;
121 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); 123 g_nvdebug_state[i].regs = ioremap(regs->start, resource_size(regs));
122 if (ids.raw == -1) 124 if (!g_nvdebug_state[i].regs) {
125 printk(KERN_ERR "[nvdebug] Unable to map BAR0 on the integrated GPU\n");
123 return -EADDRNOTAVAIL; 126 return -EADDRNOTAVAIL;
124 g_nvdebug_state[i].chip_id = ids.chip_id; 127 }
128 // The Jetson TX1, TX2, Xavier, and Orin do not have a BAR2 (but do have
129 // BAR1). On the TX2+, all their platform resources are:
130 // [nvdebug] Region 0: Memory at 17000000 [size=16777216]
131 // [nvdebug] Region 1: Memory at 18000000 [size=16777216]
132 // [nvdebug] Region 2: Memory at 3b41000 [size=4096]
133 // The TX1 has the same regions, but at different base addresses.
125 g_nvdebug_state[i].bar3 = NULL; 134 g_nvdebug_state[i].bar3 = NULL;
126 g_nvdebug_state[i].pcid = NULL; 135 g_nvdebug_state[i].pcid = NULL;
136 g_nvdebug_state[i].platd = platd;
127 g_nvdebug_state[i].dev = dev; 137 g_nvdebug_state[i].dev = dev;
138 // Don't check Chip ID until everything else is initalized
139 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
140 if (ids.raw == -1) {
141 printk(KERN_ERR "[nvdebug] Unable to read config from Master Controller on the integrated GPU\n");
142 return -EADDRNOTAVAIL;
143 }
144 g_nvdebug_state[i].chip_id = ids.chip_id;
128 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", 145 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.",
129 ids.chip_id, ARCH2NAME(ids.architecture)); 146 ids.chip_id, ARCH2NAME(ids.architecture));
130 i++; 147 i++;
@@ -140,12 +157,6 @@ int probe_and_cache_devices(void) {
140 pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); 157 pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n");
141 return -EADDRNOTAVAIL; 158 return -EADDRNOTAVAIL;
142 } 159 }
143 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
144 if (ids.raw == -1) {
145 pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
146 return -EADDRNOTAVAIL;
147 }
148 g_nvdebug_state[i].chip_id = ids.chip_id;
149 // Map BAR3 (CPU-accessible mappings of GPU DRAM) 160 // Map BAR3 (CPU-accessible mappings of GPU DRAM)
150 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); 161 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0);
151 // XXX: Try mapping only the lower half of BAR3 on fail 162 // XXX: Try mapping only the lower half of BAR3 on fail
@@ -153,7 +164,15 @@ int probe_and_cache_devices(void) {
153 if (!g_nvdebug_state[i].bar3) 164 if (!g_nvdebug_state[i].bar3)
154 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); 165 g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2);
155 g_nvdebug_state[i].pcid = pcid; 166 g_nvdebug_state[i].pcid = pcid;
167 g_nvdebug_state[i].platd = NULL;
156 g_nvdebug_state[i].dev = &pcid->dev; 168 g_nvdebug_state[i].dev = &pcid->dev;
169 // Don't check Chip ID until everything else is initalized
170 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
171 if (ids.raw == -1) {
172 pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n");
173 return -EADDRNOTAVAIL;
174 }
175 g_nvdebug_state[i].chip_id = ids.chip_id;
157 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", 176 printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.",
158 ids.chip_id, ARCH2NAME(ids.architecture)); 177 ids.chip_id, ARCH2NAME(ids.architecture));
159#if INTERRUPT_DEBUG 178#if INTERRUPT_DEBUG
@@ -430,6 +449,9 @@ static void __exit nvdebug_exit(void) {
430#if INTERRUPT_DEBUG 449#if INTERRUPT_DEBUG
431 free_irq(g->pcid->irq, g->pcid); 450 free_irq(g->pcid->irq, g->pcid);
432#endif // INTERRUPT_DEBUG 451#endif // INTERRUPT_DEBUG
452 } else {
453 if (g->regs)
454 iounmap(g->regs);
433 } 455 }
434 printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id); 456 printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id);
435 } 457 }
diff --git a/nvdebug_linux.c b/nvdebug_linux.c
index 830ec6e..111d5aa 100644
--- a/nvdebug_linux.c
+++ b/nvdebug_linux.c
@@ -3,16 +3,22 @@
3 */ 3 */
4#include "nvdebug_linux.h" 4#include "nvdebug_linux.h"
5#include <asm/io.h> // For read[l,q] and write[l,q] 5#include <asm/io.h> // For read[l,q] and write[l,q]
6#include <linux/pm_runtime.h> // For pm_runtime_[enabled,get,put]()
6 7
7// Similar to nvgpu_readl()
8// (except we don't try to resolve situations where regs is NULL)
9u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { 8u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
10 u32 ret; 9 u32 ret;
11 if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { 10 // If this is an integrated ("platform") GPU, make sure that it's on first
11 // (pm_runtime_enabled() will return false until nvgpu is started. Once
12 // nvgpu is started, pm_runtime_get() will attempt to resume the GPU.)
13 // This works to bring up the TX2, Xavier, and Orin, but not the TX1.
14 if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
12 printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); 15 printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n");
13 return -1; 16 return -1;
14 } 17 }
15 ret = readl(s->regs + r); 18 ret = readl(s->regs + r);
19 // If an integrated GPU, allow it to suspend again (if idle)
20 if (s->platd)
21 pm_runtime_put(s->dev);
16 // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the 22 // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the
17 // upper 3 nibbles when there is a possible issue". Further code uses the 23 // upper 3 nibbles when there is a possible issue". Further code uses the
18 // middle three nibbles as an error code, and ignores the bottom two. 24 // middle three nibbles as an error code, and ignores the bottom two.
@@ -29,16 +35,20 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
29 return ret; 35 return ret;
30} 36}
31 37
32// quadword version of nvdebug_readl() 38// quadword (8-byte) version of nvdebug_readl()
33u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { 39u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
34 u64 ret; 40 u64 ret;
35 if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { 41 // If this is an integrated ("platform") GPU, make sure that it's on first
42 if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
36 printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); 43 printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n");
37 return -1; 44 return -1;
38 } 45 }
39 // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl 46 // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl
40 ret = readl(s->regs + r); 47 ret = readl(s->regs + r);
41 ret |= ((u64)readl(s->regs + r + 4)) << 32; 48 ret |= ((u64)readl(s->regs + r + 4)) << 32;
49 // If an integrated GPU, allow it to suspend again (if idle)
50 if (s->platd)
51 pm_runtime_put(s->dev);
42 // See comment in nvdebug_readl() regarding error checking 52 // See comment in nvdebug_readl() regarding error checking
43 if ((ret & 0xfff00000ull) == 0xbad00000ull) { 53 if ((ret & 0xfff00000ull) == 0xbad00000ull) {
44 printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret); 54 printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret);
@@ -47,23 +57,30 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
47 return ret; 57 return ret;
48} 58}
49 59
50// Similar to nvgpu_writel()
51void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { 60void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
52 if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { 61 // If this is an integrated ("platform") GPU, make sure that it's on first
62 if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
53 printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); 63 printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n");
54 return; 64 return;
55 } 65 }
56 writel_relaxed(v, s->regs + r); 66 writel_relaxed(v, s->regs + r);
57 wmb(); 67 wmb();
68 // If an integrated GPU, allow it to suspend again (if idle)
69 if (s->platd)
70 pm_runtime_put(s->dev);
58} 71}
59 72
60// quadword version of nvdebug_writel() 73// quadword (8-byte) version of nvdebug_writel()
61// XXX: Not clear this works on all platforms 74// XXX: Not clear this works on all platforms
62void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { 75void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
63 if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { 76 // If this is an integrated ("platform") GPU, make sure that it's on first
77 if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) {
64 printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); 78 printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n");
65 return; 79 return;
66 } 80 }
67 writeq_relaxed(v, s->regs + r); 81 writeq_relaxed(v, s->regs + r);
68 wmb(); 82 wmb();
83 // If an integrated GPU, allow it to suspend again (if idle)
84 if (s->platd)
85 pm_runtime_put(s->dev);
69} 86}
diff --git a/nvdebug_linux.h b/nvdebug_linux.h
index 022d1cf..2ad4ce1 100644
--- a/nvdebug_linux.h
+++ b/nvdebug_linux.h
@@ -35,15 +35,3 @@ static inline int file2parentgpuidx(const struct file *f) {
35 // module. 35 // module.
36 return (uintptr_t)pde_data(file_dentry(f)->d_parent->d_inode); 36 return (uintptr_t)pde_data(file_dentry(f)->d_parent->d_inode);
37} 37}
38
39#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
40// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry
41// of the gk20a struct (after a function pointer). This change was made as L4T
42// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+)
43// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA
44// i.e. if FUSA was enabled, this is wrong.
45#define gk20a_regs(gk20a) (*(void**)((void*)gk20a + sizeof(void(*)(void))))
46#else
47#include <os/linux/os_linux.h> // For struct nvgpu_os_linux, which holds regs
48#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs)
49#endif