aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.c
diff options
context:
space:
mode:
Diffstat (limited to 'nvdebug.c')
-rw-r--r--nvdebug.c278
1 files changed, 278 insertions, 0 deletions
diff --git a/nvdebug.c b/nvdebug.c
new file mode 100644
index 0000000..31a797e
--- /dev/null
+++ b/nvdebug.c
@@ -0,0 +1,278 @@
1/* Copyright 2021 Joshua Bakita
2 * SPDX-License-Identifier: MIT
3 */
4
5/* TODO
6 * - Add /proc /sys or debugfs interface
7 * - Add API to trigger a preemption
8 */
9
10#include <linux/module.h>
11#include <linux/kernel.h>
12#include <linux/device.h>
13#include <linux/kallsyms.h>
14#include <linux/iommu.h> // For struct iommu_domain
15#include <asm/io.h>
16
17/* Currently used symbols:
18 * - struct gk20a;
19 * - struct nvgpu_os_linux;
20 * - void nvgpu_writel(struct gk20a *g, u32 reg_addr, u32 value);
21 */
22#include <nvgpu/io.h>
23#include <nvgpu/gk20a.h>
24#include <os/linux/os_linux.h>
25
26#include "nvdebug.h"
27
28MODULE_LICENSE("GPL"); // LIAR
29MODULE_AUTHOR("Joshua Bakita");
30MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs");
31MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now
32
33// Bus types are global symbols in the kernel
34extern struct bus_type platform_bus_type;
35
36static inline struct gk20a *get_gk20a(struct device *dev) {
37 // XXX: Only works because gk20a* is the first member of gk20a_platform
38 return *((struct gk20a**)dev_get_drvdata(dev));
39}
40
41// Functionally identical to nvgpu_readl()
42// (except we don't try to resolve situations where regs is NULL)
43static inline u32 nvdebug_readl(struct gk20a* g, u32 r) {
44 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g);
45 if (unlikely(!g_os->regs)) {
46 printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n");
47 return -1;
48 }
49 return readl(g_os->regs + r);
50}
51
52// Functionally identical to nvgpu_writel()
53static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) {
54 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g);
55 if (unlikely(!g_os->regs)) {
56 printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n");
57 return;
58 }
59 writel_relaxed(v, g_os->regs + r);
60 wmb();
61}
62/*
63#define RUNLIST_PROCFS_NAME "runlist"
64
65static const struct seq_operations runlist_file_seq_ops = {
66 .start =
67 .next =
68 .stop =
69 .show =
70};
71
72static const struct file_operations runlist_file_ops = {
73 .read =
74*/
75/*static void read_bytes(struct gk20a *g, void* target, u32 start, u32 num_bytes) {
76 u32 *output = target;
77 u32 i;
78 // Read u32s from the GPU
79 for (i = 0; i < num_bytes; i += 4) {
80 output[i/4] = _nvgpu_readl(g, start + i);
81 printk(KERN_INFO "[nvdebug] U32 %d: %0x\n", i, output[i/4]);
82 }
83}
84
85static void read_bytes(void* target, void* start, u32 num_bytes) {
86 u32 *output = target;
87 u32 i;
88 // Read u32s from the GPU
89 for (i = 0; i < num_bytes; i += 4) {
90 output[i/4] = readl(start + i);
91 printk(KERN_INFO "[nvdebug] U32 %d: %0x\n", i, output[i/4]);
92 }
93}*/
94
95/*
96 +---- TSG Entry %d ----+
97 | Scale: %d |
98 | Timeout: %d |
99 +----------------------+
100
101
102
103
104
105
106*/
107
108#define PRE KERN_INFO "[nvdebug] "
109
110static void nvdebug_print_tsg(struct entry_tsg* tsg) {
111 if (tsg->entry_type != ENTRY_TYPE_TSG) {
112 printk(KERN_WARNING "[nvdebug] Attempted to print non-TSG in nvdebug_print_tsg()!\n");
113 return;
114 }
115 printk(PRE "+---- TSG Entry %-2d----+", tsg->tsgid);
116 printk(PRE "| Scale: %-13d|", tsg->timeslice_scale);
117 printk(PRE "| Timeout: %-11d|", tsg->timeslice_timeout);
118 printk(PRE "+---------------------+");
119}
120
121static void nvdebug_print_chan(struct runlist_chan* chan) {
122 char* loc_txt;
123 u64 inst_ptr;
124 if (chan->entry_type != ENTRY_TYPE_CHAN) {
125 printk(KERN_WARNING "[nvdebug] Attempted to print non-channel in nvdebug_print_channel()!\n");
126 return;
127 }
128 switch (chan->inst_target) {
129 case TARGET_VID_MEM:
130 loc_txt = "VID_MEM";
131 break;
132 case TARGET_SYS_MEM_COHERENT:
133 loc_txt = "SYS_MEM_COHERENT";
134 break;
135 case TARGET_SYS_MEM_NONCOHERENT:
136 loc_txt = "SYS_MEM_NONCOHERENT";
137 break;
138 default:
139 printk(KERN_WARNING "[nvdebug] Invalid aperture in runlist channel!\n");
140 return;
141 }
142 // Reconstruct pointer to channel instance block
143 inst_ptr = chan->inst_ptr_hi;
144 inst_ptr <<= 32;
145 inst_ptr |= chan->inst_ptr_lo << 12;
146
147 printk(PRE " +- Channel Entry %-4d-+", chan->chid);
148 printk(PRE " | Runqueue Selector: %d|", chan->runqueue_selector);
149 printk(PRE " | Instance PTR: |");
150 printk(PRE " | %#018llx |", inst_ptr);
151 printk(PRE " | %-20s|", loc_txt);
152 printk(PRE " +---------------------+");
153}
154
155#define for_chan_in_tsg(chan, tsg) \
156 for (chan = (struct runlist_chan*)(tsg + 1); \
157 (void*)chan < (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length; \
158 chan++)
159
160#define next_tsg(tsg) \
161 (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length
162
163static void nvdebug_print_runlist(struct entry_tsg* head, runlist_info_t rl_info) {
164 int rl_idx = 0;
165 struct runlist_chan* chan;
166 printk(PRE "tsg->tsg_length: %d\n", head->tsg_length);
167 printk(PRE "rl_info.len: %d\n", rl_info.len);
168 while (rl_idx < rl_info.len) {
169 nvdebug_print_tsg(head);
170 for_chan_in_tsg(chan, head) {
171 nvdebug_print_chan(chan);
172 }
173 rl_idx += 1 + head->tsg_length;
174 head = next_tsg(head);
175 }
176}
177
178static int __init nvdebug_init(void) {
179 struct device *dev = NULL;
180 struct device *temp_dev;
181 struct gk20a *g;
182 struct entry_tsg head;
183 runlist_base_t rl_base;
184 runlist_info_t rl_info;
185 u64 runlist_iova;
186 // Get the last device that matches our name
187 while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) {
188 printk(KERN_INFO "Found a matching device\n");
189 dev = temp_dev;
190 }
191 if (!dev)
192 return -EIO;
193 g = get_gk20a(dev);
194 // This address seems to not be:
195 // - A GPU address (type is sysmem_coherent)
196 // - A physical address (dereferencing after ioremap crashes)
197 // - A kernel virtual address (dereferencing segfaults)
198 // So maybe it's some sort of custom thing? This is an address that the GPU
199 // can use, so it would make most sense for it to be a physical address.
200 //
201 // BUT, it can't possibly be a physical address, as it would refer to an
202 // address greater than the maximum one on our system (by a lot!).
203 // Maybe I'm reading the runlist base wrong?
204 // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual
205 // address! So, what's this I/O address space? All I know is that it's what
206 // nvgpu_mem_get_addr() returns. That function returns the result of either:
207 // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?)
208 // converts an IPA to a PA?
209 // - nvgpu_mem_iommu_translate
210 //
211 // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which
212 // returns SYSMEM.
213 //
214 // To convert a physical address to a IOMMU address, we add a bit
215 //
216 // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working
217 // before because the GPU had simply gone to sleep and invalidated its
218 // register state, so nvgpu_readl() was simply returning garbage.
219
220 printk(KERN_INFO "[nvdebug] Pulling runlist base address from %x\n", NV_PFIFO_RUNLIST_BASE);
221 printk(KERN_INFO "[nvdebug] Using struct gk20a* of %px\n", g);
222 printk(KERN_INFO "[nvdebug] g->name: %s, g->power_on: %d, g->sw_ready: %d, g->is_virtual %d\n", g->name, g->power_on, g->sw_ready, g->is_virtual);
223 struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g);
224 printk(KERN_INFO "[nvdebug] l->regs %px, l->regs_saved %px\n", l->regs, l->regs_saved);
225 if (!l->regs)
226 return -EIO;
227 rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE);
228 rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST);
229 runlist_iova = ((u64)rl_base.ptr) << 12;
230 printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n", rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova);
231 // Segfaults
232 //u32 attempted_read = ioread32(runlist_iova);
233 //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read);
234
235 // Errors out
236 //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg));
237 //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr);
238
239 /* Overcomplicated?
240 struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
241 if (!domain) {
242 printk(KERN_INFO "[nvdebug] No IOMMU domain!\n");
243 return -EIO;
244 }
245 u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova);
246 printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr);
247 */
248
249 printk(KERN_INFO "[nvdebug] Runlist phys_to_virt: %px\n", (void*)phys_to_virt(runlist_iova));
250 printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt: %x\n", *(u32*)phys_to_virt(runlist_iova));
251 head = *(struct entry_tsg*)phys_to_virt(runlist_iova);
252 nvdebug_print_runlist((struct entry_tsg*)phys_to_virt(runlist_iova), rl_info);
253 //nvdebug_print_tsg(&head);
254 //nvdebug_print_chan((struct runlist_chan*)(phys_to_virt(runlist_iova) + sizeof(struct entry_tsg)));
255 //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type);
256 //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale);
257 //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout);
258 //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
259 //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid);
260
261 //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL));
262 //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL));
263 //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes
264 //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg));
265 /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type);
266 printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale);
267 printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout);
268 printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
269 printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid);
270 */return 0;
271}
272
273static void __exit nvdebug_exit(void) {
274 printk(KERN_INFO "[nvdebug] Exiting...\n");
275}
276
277module_init(nvdebug_init);
278module_exit(nvdebug_exit);