diff options
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | nvdebug.h | 2 | ||||
-rw-r--r-- | nvdebug_entry.c | 44 | ||||
-rw-r--r-- | nvdebug_linux.c | 35 | ||||
-rw-r--r-- | nvdebug_linux.h | 12 |
5 files changed, 61 insertions, 39 deletions
@@ -4,13 +4,6 @@ nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \ | |||
4 | KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" | 4 | KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" |
5 | # -mfentry above if not building due to mcount missing | 5 | # -mfentry above if not building due to mcount missing |
6 | 6 | ||
7 | # TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...) | ||
8 | ccflags-y += -I$(PWD)/include | ||
9 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include | ||
10 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu | ||
11 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include | ||
12 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include/uapi | ||
13 | |||
14 | all: | 7 | all: |
15 | make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules | 8 | make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules |
16 | clean: | 9 | clean: |
@@ -1470,6 +1470,8 @@ struct nvdebug_state { | |||
1470 | struct gk20a *g; | 1470 | struct gk20a *g; |
1471 | // Pointer to PCI device needed for pci_iounmap and pci_resource_start | 1471 | // Pointer to PCI device needed for pci_iounmap and pci_resource_start |
1472 | struct pci_dev *pcid; | 1472 | struct pci_dev *pcid; |
1473 | // Pointer to platform device needed for platform_get_resource | ||
1474 | struct platform_device *platd; | ||
1473 | // Pointer to generic device struct (both platform and pcie devices) | 1475 | // Pointer to generic device struct (both platform and pcie devices) |
1474 | struct device *dev; | 1476 | struct device *dev; |
1475 | }; | 1477 | }; |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index d5df7db..3a10e13 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/pci.h> // For PCI device scanning | 9 | #include <linux/pci.h> // For PCI device scanning |
10 | #include <linux/platform_device.h> // For platform_device struct | ||
10 | #include <linux/proc_fs.h> // So we can set up entries in /proc | 11 | #include <linux/proc_fs.h> // So we can set up entries in /proc |
11 | 12 | ||
12 | #include "nvdebug_linux.h" | 13 | #include "nvdebug_linux.h" |
@@ -114,17 +115,33 @@ int probe_and_cache_devices(void) { | |||
114 | // TODO: Support other platform bus devices (gk20a - TK1) | 115 | // TODO: Support other platform bus devices (gk20a - TK1) |
115 | if (dev) { | 116 | if (dev) { |
116 | mc_boot_0_t ids; | 117 | mc_boot_0_t ids; |
118 | struct platform_device *platd = container_of(dev, struct platform_device, dev); | ||
119 | struct resource *regs = platform_get_resource(platd, IORESOURCE_MEM, 0); | ||
117 | g_nvdebug_state[i].g = get_gk20a(dev); | 120 | g_nvdebug_state[i].g = get_gk20a(dev); |
118 | g_nvdebug_state[i].regs = gk20a_regs(g_nvdebug_state[i].g); | 121 | if (!regs) |
119 | if (!g_nvdebug_state[i].regs) | ||
120 | return -EADDRNOTAVAIL; | 122 | return -EADDRNOTAVAIL; |
121 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | 123 | g_nvdebug_state[i].regs = ioremap(regs->start, resource_size(regs)); |
122 | if (ids.raw == -1) | 124 | if (!g_nvdebug_state[i].regs) { |
125 | printk(KERN_ERR "[nvdebug] Unable to map BAR0 on the integrated GPU\n"); | ||
123 | return -EADDRNOTAVAIL; | 126 | return -EADDRNOTAVAIL; |
124 | g_nvdebug_state[i].chip_id = ids.chip_id; | 127 | } |
128 | // The Jetson TX1, TX2, Xavier, and Orin do not have a BAR2 (but do have | ||
129 | // BAR1). On the TX2+, all their platform resources are: | ||
130 | // [nvdebug] Region 0: Memory at 17000000 [size=16777216] | ||
131 | // [nvdebug] Region 1: Memory at 18000000 [size=16777216] | ||
132 | // [nvdebug] Region 2: Memory at 3b41000 [size=4096] | ||
133 | // The TX1 has the same regions, but at different base addresses. | ||
125 | g_nvdebug_state[i].bar3 = NULL; | 134 | g_nvdebug_state[i].bar3 = NULL; |
126 | g_nvdebug_state[i].pcid = NULL; | 135 | g_nvdebug_state[i].pcid = NULL; |
136 | g_nvdebug_state[i].platd = platd; | ||
127 | g_nvdebug_state[i].dev = dev; | 137 | g_nvdebug_state[i].dev = dev; |
138 | // Don't check Chip ID until everything else is initalized | ||
139 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
140 | if (ids.raw == -1) { | ||
141 | printk(KERN_ERR "[nvdebug] Unable to read config from Master Controller on the integrated GPU\n"); | ||
142 | return -EADDRNOTAVAIL; | ||
143 | } | ||
144 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
128 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", | 145 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", |
129 | ids.chip_id, ARCH2NAME(ids.architecture)); | 146 | ids.chip_id, ARCH2NAME(ids.architecture)); |
130 | i++; | 147 | i++; |
@@ -140,12 +157,6 @@ int probe_and_cache_devices(void) { | |||
140 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); | 157 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); |
141 | return -EADDRNOTAVAIL; | 158 | return -EADDRNOTAVAIL; |
142 | } | 159 | } |
143 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
144 | if (ids.raw == -1) { | ||
145 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
146 | return -EADDRNOTAVAIL; | ||
147 | } | ||
148 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
149 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) | 160 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) |
150 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); | 161 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); |
151 | // XXX: Try mapping only the lower half of BAR3 on fail | 162 | // XXX: Try mapping only the lower half of BAR3 on fail |
@@ -153,7 +164,15 @@ int probe_and_cache_devices(void) { | |||
153 | if (!g_nvdebug_state[i].bar3) | 164 | if (!g_nvdebug_state[i].bar3) |
154 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); | 165 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); |
155 | g_nvdebug_state[i].pcid = pcid; | 166 | g_nvdebug_state[i].pcid = pcid; |
167 | g_nvdebug_state[i].platd = NULL; | ||
156 | g_nvdebug_state[i].dev = &pcid->dev; | 168 | g_nvdebug_state[i].dev = &pcid->dev; |
169 | // Don't check Chip ID until everything else is initalized | ||
170 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
171 | if (ids.raw == -1) { | ||
172 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
173 | return -EADDRNOTAVAIL; | ||
174 | } | ||
175 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
157 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", | 176 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", |
158 | ids.chip_id, ARCH2NAME(ids.architecture)); | 177 | ids.chip_id, ARCH2NAME(ids.architecture)); |
159 | #if INTERRUPT_DEBUG | 178 | #if INTERRUPT_DEBUG |
@@ -430,6 +449,9 @@ static void __exit nvdebug_exit(void) { | |||
430 | #if INTERRUPT_DEBUG | 449 | #if INTERRUPT_DEBUG |
431 | free_irq(g->pcid->irq, g->pcid); | 450 | free_irq(g->pcid->irq, g->pcid); |
432 | #endif // INTERRUPT_DEBUG | 451 | #endif // INTERRUPT_DEBUG |
452 | } else { | ||
453 | if (g->regs) | ||
454 | iounmap(g->regs); | ||
433 | } | 455 | } |
434 | printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id); | 456 | printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id); |
435 | } | 457 | } |
diff --git a/nvdebug_linux.c b/nvdebug_linux.c index 830ec6e..111d5aa 100644 --- a/nvdebug_linux.c +++ b/nvdebug_linux.c | |||
@@ -3,16 +3,22 @@ | |||
3 | */ | 3 | */ |
4 | #include "nvdebug_linux.h" | 4 | #include "nvdebug_linux.h" |
5 | #include <asm/io.h> // For read[l,q] and write[l,q] | 5 | #include <asm/io.h> // For read[l,q] and write[l,q] |
6 | #include <linux/pm_runtime.h> // For pm_runtime_[enabled,get,put]() | ||
6 | 7 | ||
7 | // Similar to nvgpu_readl() | ||
8 | // (except we don't try to resolve situations where regs is NULL) | ||
9 | u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | 8 | u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { |
10 | u32 ret; | 9 | u32 ret; |
11 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 10 | // If this is an integrated ("platform") GPU, make sure that it's on first |
11 | // (pm_runtime_enabled() will return false until nvgpu is started. Once | ||
12 | // nvgpu is started, pm_runtime_get() will attempt to resume the GPU.) | ||
13 | // This works to bring up the TX2, Xavier, and Orin, but not the TX1. | ||
14 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | ||
12 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); | 15 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); |
13 | return -1; | 16 | return -1; |
14 | } | 17 | } |
15 | ret = readl(s->regs + r); | 18 | ret = readl(s->regs + r); |
19 | // If an integrated GPU, allow it to suspend again (if idle) | ||
20 | if (s->platd) | ||
21 | pm_runtime_put(s->dev); | ||
16 | // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the | 22 | // According to open-gpu-kernel-modules, the GPU "will return 0xbad in the |
17 | // upper 3 nibbles when there is a possible issue". Further code uses the | 23 | // upper 3 nibbles when there is a possible issue". Further code uses the |
18 | // middle three nibbles as an error code, and ignores the bottom two. | 24 | // middle three nibbles as an error code, and ignores the bottom two. |
@@ -29,16 +35,20 @@ u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | |||
29 | return ret; | 35 | return ret; |
30 | } | 36 | } |
31 | 37 | ||
32 | // quadword version of nvdebug_readl() | 38 | // quadword (8-byte) version of nvdebug_readl() |
33 | u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | 39 | u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { |
34 | u64 ret; | 40 | u64 ret; |
35 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 41 | // If this is an integrated ("platform") GPU, make sure that it's on first |
42 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | ||
36 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); | 43 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); |
37 | return -1; | 44 | return -1; |
38 | } | 45 | } |
39 | // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl | 46 | // readq seems to always (?) return the uppermost 32 bits as 0, so workaround with readl |
40 | ret = readl(s->regs + r); | 47 | ret = readl(s->regs + r); |
41 | ret |= ((u64)readl(s->regs + r + 4)) << 32; | 48 | ret |= ((u64)readl(s->regs + r + 4)) << 32; |
49 | // If an integrated GPU, allow it to suspend again (if idle) | ||
50 | if (s->platd) | ||
51 | pm_runtime_put(s->dev); | ||
42 | // See comment in nvdebug_readl() regarding error checking | 52 | // See comment in nvdebug_readl() regarding error checking |
43 | if ((ret & 0xfff00000ull) == 0xbad00000ull) { | 53 | if ((ret & 0xfff00000ull) == 0xbad00000ull) { |
44 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret); | 54 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data of %#18llx\n", r, ret); |
@@ -47,23 +57,30 @@ u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | |||
47 | return ret; | 57 | return ret; |
48 | } | 58 | } |
49 | 59 | ||
50 | // Similar to nvgpu_writel() | ||
51 | void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { | 60 | void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { |
52 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 61 | // If this is an integrated ("platform") GPU, make sure that it's on first |
62 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | ||
53 | printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); | 63 | printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); |
54 | return; | 64 | return; |
55 | } | 65 | } |
56 | writel_relaxed(v, s->regs + r); | 66 | writel_relaxed(v, s->regs + r); |
57 | wmb(); | 67 | wmb(); |
68 | // If an integrated GPU, allow it to suspend again (if idle) | ||
69 | if (s->platd) | ||
70 | pm_runtime_put(s->dev); | ||
58 | } | 71 | } |
59 | 72 | ||
60 | // quadword version of nvdebug_writel() | 73 | // quadword (8-byte) version of nvdebug_writel() |
61 | // XXX: Not clear this works on all platforms | 74 | // XXX: Not clear this works on all platforms |
62 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | 75 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { |
63 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | 76 | // If this is an integrated ("platform") GPU, make sure that it's on first |
77 | if (s->platd && (!pm_runtime_enabled(s->dev) || pm_runtime_get(s->dev) < 0)) { | ||
64 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); | 78 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); |
65 | return; | 79 | return; |
66 | } | 80 | } |
67 | writeq_relaxed(v, s->regs + r); | 81 | writeq_relaxed(v, s->regs + r); |
68 | wmb(); | 82 | wmb(); |
83 | // If an integrated GPU, allow it to suspend again (if idle) | ||
84 | if (s->platd) | ||
85 | pm_runtime_put(s->dev); | ||
69 | } | 86 | } |
diff --git a/nvdebug_linux.h b/nvdebug_linux.h index 022d1cf..2ad4ce1 100644 --- a/nvdebug_linux.h +++ b/nvdebug_linux.h | |||
@@ -35,15 +35,3 @@ static inline int file2parentgpuidx(const struct file *f) { | |||
35 | // module. | 35 | // module. |
36 | return (uintptr_t)pde_data(file_dentry(f)->d_parent->d_inode); | 36 | return (uintptr_t)pde_data(file_dentry(f)->d_parent->d_inode); |
37 | } | 37 | } |
38 | |||
39 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) | ||
40 | // Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry | ||
41 | // of the gk20a struct (after a function pointer). This change was made as L4T | ||
42 | // was upgraded from Linux 4.9 to 5.10 (r32 -> r34+) | ||
43 | // Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA | ||
44 | // i.e. if FUSA was enabled, this is wrong. | ||
45 | #define gk20a_regs(gk20a) (*(void**)((void*)gk20a + sizeof(void(*)(void)))) | ||
46 | #else | ||
47 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux, which holds regs | ||
48 | #define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) | ||
49 | #endif | ||