diff options
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | nvdebug.c | 278 | ||||
-rw-r--r-- | nvdebug.h | 49 | ||||
-rw-r--r-- | nvdebug_entry.c | 40 | ||||
-rw-r--r-- | runlist.c | 106 | ||||
-rw-r--r-- | runlist_procfs.c | 102 |
6 files changed, 298 insertions, 278 deletions
@@ -1,4 +1,5 @@ | |||
1 | obj-m += nvdebug.o | 1 | obj-m += nvdebug.o |
2 | nvdebug-objs = runlist_procfs.o runlist.o nvdebug_entry.o | ||
2 | 3 | ||
3 | # TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...) | 4 | # TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...) |
4 | #ccflags-y += -I$(PWD)/include | 5 | #ccflags-y += -I$(PWD)/include |
diff --git a/nvdebug.c b/nvdebug.c deleted file mode 100644 index 31a797e..0000000 --- a/nvdebug.c +++ /dev/null | |||
@@ -1,278 +0,0 @@ | |||
1 | /* Copyright 2021 Joshua Bakita | ||
2 | * SPDX-License-Identifier: MIT | ||
3 | */ | ||
4 | |||
5 | /* TODO | ||
6 | * - Add /proc /sys or debugfs interface | ||
7 | * - Add API to trigger a preemption | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/device.h> | ||
13 | #include <linux/kallsyms.h> | ||
14 | #include <linux/iommu.h> // For struct iommu_domain | ||
15 | #include <asm/io.h> | ||
16 | |||
17 | /* Currently used symbols: | ||
18 | * - struct gk20a; | ||
19 | * - struct nvgpu_os_linux; | ||
20 | * - void nvgpu_writel(struct gk20a *g, u32 reg_addr, u32 value); | ||
21 | */ | ||
22 | #include <nvgpu/io.h> | ||
23 | #include <nvgpu/gk20a.h> | ||
24 | #include <os/linux/os_linux.h> | ||
25 | |||
26 | #include "nvdebug.h" | ||
27 | |||
28 | MODULE_LICENSE("GPL"); // LIAR | ||
29 | MODULE_AUTHOR("Joshua Bakita"); | ||
30 | MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); | ||
31 | MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now | ||
32 | |||
33 | // Bus types are global symbols in the kernel | ||
34 | extern struct bus_type platform_bus_type; | ||
35 | |||
36 | static inline struct gk20a *get_gk20a(struct device *dev) { | ||
37 | // XXX: Only works because gk20a* is the first member of gk20a_platform | ||
38 | return *((struct gk20a**)dev_get_drvdata(dev)); | ||
39 | } | ||
40 | |||
41 | // Functionally identical to nvgpu_readl() | ||
42 | // (except we don't try to resolve situations where regs is NULL) | ||
43 | static inline u32 nvdebug_readl(struct gk20a* g, u32 r) { | ||
44 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | ||
45 | if (unlikely(!g_os->regs)) { | ||
46 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); | ||
47 | return -1; | ||
48 | } | ||
49 | return readl(g_os->regs + r); | ||
50 | } | ||
51 | |||
52 | // Functionally identical to nvgpu_writel() | ||
53 | static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { | ||
54 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | ||
55 | if (unlikely(!g_os->regs)) { | ||
56 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); | ||
57 | return; | ||
58 | } | ||
59 | writel_relaxed(v, g_os->regs + r); | ||
60 | wmb(); | ||
61 | } | ||
62 | /* | ||
63 | #define RUNLIST_PROCFS_NAME "runlist" | ||
64 | |||
65 | static const struct seq_operations runlist_file_seq_ops = { | ||
66 | .start = | ||
67 | .next = | ||
68 | .stop = | ||
69 | .show = | ||
70 | }; | ||
71 | |||
72 | static const struct file_operations runlist_file_ops = { | ||
73 | .read = | ||
74 | */ | ||
75 | /*static void read_bytes(struct gk20a *g, void* target, u32 start, u32 num_bytes) { | ||
76 | u32 *output = target; | ||
77 | u32 i; | ||
78 | // Read u32s from the GPU | ||
79 | for (i = 0; i < num_bytes; i += 4) { | ||
80 | output[i/4] = _nvgpu_readl(g, start + i); | ||
81 | printk(KERN_INFO "[nvdebug] U32 %d: %0x\n", i, output[i/4]); | ||
82 | } | ||
83 | } | ||
84 | |||
85 | static void read_bytes(void* target, void* start, u32 num_bytes) { | ||
86 | u32 *output = target; | ||
87 | u32 i; | ||
88 | // Read u32s from the GPU | ||
89 | for (i = 0; i < num_bytes; i += 4) { | ||
90 | output[i/4] = readl(start + i); | ||
91 | printk(KERN_INFO "[nvdebug] U32 %d: %0x\n", i, output[i/4]); | ||
92 | } | ||
93 | }*/ | ||
94 | |||
95 | /* | ||
96 | +---- TSG Entry %d ----+ | ||
97 | | Scale: %d | | ||
98 | | Timeout: %d | | ||
99 | +----------------------+ | ||
100 | |||
101 | |||
102 | |||
103 | |||
104 | |||
105 | |||
106 | */ | ||
107 | |||
108 | #define PRE KERN_INFO "[nvdebug] " | ||
109 | |||
110 | static void nvdebug_print_tsg(struct entry_tsg* tsg) { | ||
111 | if (tsg->entry_type != ENTRY_TYPE_TSG) { | ||
112 | printk(KERN_WARNING "[nvdebug] Attempted to print non-TSG in nvdebug_print_tsg()!\n"); | ||
113 | return; | ||
114 | } | ||
115 | printk(PRE "+---- TSG Entry %-2d----+", tsg->tsgid); | ||
116 | printk(PRE "| Scale: %-13d|", tsg->timeslice_scale); | ||
117 | printk(PRE "| Timeout: %-11d|", tsg->timeslice_timeout); | ||
118 | printk(PRE "+---------------------+"); | ||
119 | } | ||
120 | |||
121 | static void nvdebug_print_chan(struct runlist_chan* chan) { | ||
122 | char* loc_txt; | ||
123 | u64 inst_ptr; | ||
124 | if (chan->entry_type != ENTRY_TYPE_CHAN) { | ||
125 | printk(KERN_WARNING "[nvdebug] Attempted to print non-channel in nvdebug_print_channel()!\n"); | ||
126 | return; | ||
127 | } | ||
128 | switch (chan->inst_target) { | ||
129 | case TARGET_VID_MEM: | ||
130 | loc_txt = "VID_MEM"; | ||
131 | break; | ||
132 | case TARGET_SYS_MEM_COHERENT: | ||
133 | loc_txt = "SYS_MEM_COHERENT"; | ||
134 | break; | ||
135 | case TARGET_SYS_MEM_NONCOHERENT: | ||
136 | loc_txt = "SYS_MEM_NONCOHERENT"; | ||
137 | break; | ||
138 | default: | ||
139 | printk(KERN_WARNING "[nvdebug] Invalid aperture in runlist channel!\n"); | ||
140 | return; | ||
141 | } | ||
142 | // Reconstruct pointer to channel instance block | ||
143 | inst_ptr = chan->inst_ptr_hi; | ||
144 | inst_ptr <<= 32; | ||
145 | inst_ptr |= chan->inst_ptr_lo << 12; | ||
146 | |||
147 | printk(PRE " +- Channel Entry %-4d-+", chan->chid); | ||
148 | printk(PRE " | Runqueue Selector: %d|", chan->runqueue_selector); | ||
149 | printk(PRE " | Instance PTR: |"); | ||
150 | printk(PRE " | %#018llx |", inst_ptr); | ||
151 | printk(PRE " | %-20s|", loc_txt); | ||
152 | printk(PRE " +---------------------+"); | ||
153 | } | ||
154 | |||
155 | #define for_chan_in_tsg(chan, tsg) \ | ||
156 | for (chan = (struct runlist_chan*)(tsg + 1); \ | ||
157 | (void*)chan < (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length; \ | ||
158 | chan++) | ||
159 | |||
160 | #define next_tsg(tsg) \ | ||
161 | (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length | ||
162 | |||
163 | static void nvdebug_print_runlist(struct entry_tsg* head, runlist_info_t rl_info) { | ||
164 | int rl_idx = 0; | ||
165 | struct runlist_chan* chan; | ||
166 | printk(PRE "tsg->tsg_length: %d\n", head->tsg_length); | ||
167 | printk(PRE "rl_info.len: %d\n", rl_info.len); | ||
168 | while (rl_idx < rl_info.len) { | ||
169 | nvdebug_print_tsg(head); | ||
170 | for_chan_in_tsg(chan, head) { | ||
171 | nvdebug_print_chan(chan); | ||
172 | } | ||
173 | rl_idx += 1 + head->tsg_length; | ||
174 | head = next_tsg(head); | ||
175 | } | ||
176 | } | ||
177 | |||
178 | static int __init nvdebug_init(void) { | ||
179 | struct device *dev = NULL; | ||
180 | struct device *temp_dev; | ||
181 | struct gk20a *g; | ||
182 | struct entry_tsg head; | ||
183 | runlist_base_t rl_base; | ||
184 | runlist_info_t rl_info; | ||
185 | u64 runlist_iova; | ||
186 | // Get the last device that matches our name | ||
187 | while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) { | ||
188 | printk(KERN_INFO "Found a matching device\n"); | ||
189 | dev = temp_dev; | ||
190 | } | ||
191 | if (!dev) | ||
192 | return -EIO; | ||
193 | g = get_gk20a(dev); | ||
194 | // This address seems to not be: | ||
195 | // - A GPU address (type is sysmem_coherent) | ||
196 | // - A physical address (dereferencing after ioremap crashes) | ||
197 | // - A kernel virtual address (dereferencing segfaults) | ||
198 | // So maybe it's some sort of custom thing? This is an address that the GPU | ||
199 | // can use, so it would make most sense for it to be a physical address. | ||
200 | // | ||
201 | // BUT, it can't possibly be a physical address, as it would refer to an | ||
202 | // address greater than the maximum one on our system (by a lot!). | ||
203 | // Maybe I'm reading the runlist base wrong? | ||
204 | // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual | ||
205 | // address! So, what's this I/O address space? All I know is that it's what | ||
206 | // nvgpu_mem_get_addr() returns. That function returns the result of either: | ||
207 | // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) | ||
208 | // converts an IPA to a PA? | ||
209 | // - nvgpu_mem_iommu_translate | ||
210 | // | ||
211 | // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which | ||
212 | // returns SYSMEM. | ||
213 | // | ||
214 | // To convert a physical address to a IOMMU address, we add a bit | ||
215 | // | ||
216 | // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working | ||
217 | // before because the GPU had simply gone to sleep and invalidated its | ||
218 | // register state, so nvgpu_readl() was simply returning garbage. | ||
219 | |||
220 | printk(KERN_INFO "[nvdebug] Pulling runlist base address from %x\n", NV_PFIFO_RUNLIST_BASE); | ||
221 | printk(KERN_INFO "[nvdebug] Using struct gk20a* of %px\n", g); | ||
222 | printk(KERN_INFO "[nvdebug] g->name: %s, g->power_on: %d, g->sw_ready: %d, g->is_virtual %d\n", g->name, g->power_on, g->sw_ready, g->is_virtual); | ||
223 | struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g); | ||
224 | printk(KERN_INFO "[nvdebug] l->regs %px, l->regs_saved %px\n", l->regs, l->regs_saved); | ||
225 | if (!l->regs) | ||
226 | return -EIO; | ||
227 | rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE); | ||
228 | rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); | ||
229 | runlist_iova = ((u64)rl_base.ptr) << 12; | ||
230 | printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n", rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova); | ||
231 | // Segfaults | ||
232 | //u32 attempted_read = ioread32(runlist_iova); | ||
233 | //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read); | ||
234 | |||
235 | // Errors out | ||
236 | //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg)); | ||
237 | //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr); | ||
238 | |||
239 | /* Overcomplicated? | ||
240 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | ||
241 | if (!domain) { | ||
242 | printk(KERN_INFO "[nvdebug] No IOMMU domain!\n"); | ||
243 | return -EIO; | ||
244 | } | ||
245 | u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova); | ||
246 | printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr); | ||
247 | */ | ||
248 | |||
249 | printk(KERN_INFO "[nvdebug] Runlist phys_to_virt: %px\n", (void*)phys_to_virt(runlist_iova)); | ||
250 | printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt: %x\n", *(u32*)phys_to_virt(runlist_iova)); | ||
251 | head = *(struct entry_tsg*)phys_to_virt(runlist_iova); | ||
252 | nvdebug_print_runlist((struct entry_tsg*)phys_to_virt(runlist_iova), rl_info); | ||
253 | //nvdebug_print_tsg(&head); | ||
254 | //nvdebug_print_chan((struct runlist_chan*)(phys_to_virt(runlist_iova) + sizeof(struct entry_tsg))); | ||
255 | //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); | ||
256 | //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); | ||
257 | //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); | ||
258 | //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); | ||
259 | //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); | ||
260 | |||
261 | //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL)); | ||
262 | //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL)); | ||
263 | //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes | ||
264 | //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg)); | ||
265 | /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); | ||
266 | printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); | ||
267 | printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); | ||
268 | printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); | ||
269 | printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); | ||
270 | */return 0; | ||
271 | } | ||
272 | |||
273 | static void __exit nvdebug_exit(void) { | ||
274 | printk(KERN_INFO "[nvdebug] Exiting...\n"); | ||
275 | } | ||
276 | |||
277 | module_init(nvdebug_init); | ||
278 | module_exit(nvdebug_exit); | ||
@@ -2,6 +2,10 @@ | |||
2 | * SPDX-License-Identifier: MIT | 2 | * SPDX-License-Identifier: MIT |
3 | */ | 3 | */ |
4 | 4 | ||
5 | // TODO(jbakita): Don't depend on these. | ||
6 | #include <nvgpu/gk20a.h> // For struct gk20a | ||
7 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux | ||
8 | |||
5 | /* Runlist Channel | 9 | /* Runlist Channel |
6 | A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue | 10 | A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue |
7 | of GPU commands. These commands are typically queued from userspace. | 11 | of GPU commands. These commands are typically queued from userspace. |
@@ -125,3 +129,48 @@ typedef union { | |||
125 | } __attribute__((packed)); | 129 | } __attribute__((packed)); |
126 | uint32_t raw; | 130 | uint32_t raw; |
127 | } runlist_info_t; | 131 | } runlist_info_t; |
132 | |||
133 | // TODO(jbakita): Maybe put the above GPU types in a different file. | ||
134 | |||
135 | #define for_chan_in_tsg(chan, tsg) \ | ||
136 | for (chan = (struct runlist_chan*)(tsg + 1); \ | ||
137 | (void*)chan < (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length; \ | ||
138 | chan++) | ||
139 | |||
140 | #define next_tsg(tsg) \ | ||
141 | (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length | ||
142 | |||
143 | struct runlist_iter { | ||
144 | struct entry_tsg *curr_tsg; | ||
145 | runlist_info_t rl_info; | ||
146 | }; | ||
147 | |||
148 | // Defined in runlist.c | ||
149 | int get_runlist_iter(struct runlist_iter *rl_iter); | ||
150 | |||
151 | static inline struct gk20a *get_gk20a(struct device *dev) { | ||
152 | // XXX: Only works because gk20a* is the first member of gk20a_platform | ||
153 | return *((struct gk20a**)dev_get_drvdata(dev)); | ||
154 | } | ||
155 | |||
156 | // Functionally identical to nvgpu_readl() | ||
157 | // (except we don't try to resolve situations where regs is NULL) | ||
158 | static inline u32 nvdebug_readl(struct gk20a* g, u32 r) { | ||
159 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | ||
160 | if (unlikely(!g_os->regs)) { | ||
161 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); | ||
162 | return -1; | ||
163 | } | ||
164 | return readl(g_os->regs + r); | ||
165 | } | ||
166 | |||
167 | // Functionally identical to nvgpu_writel() | ||
168 | static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { | ||
169 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | ||
170 | if (unlikely(!g_os->regs)) { | ||
171 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); | ||
172 | return; | ||
173 | } | ||
174 | writel_relaxed(v, g_os->regs + r); | ||
175 | wmb(); | ||
176 | } | ||
diff --git a/nvdebug_entry.c b/nvdebug_entry.c new file mode 100644 index 0000000..148bd3f --- /dev/null +++ b/nvdebug_entry.c | |||
@@ -0,0 +1,40 @@ | |||
1 | /* Copyright 2021 Joshua Bakita | ||
2 | * SPDX-License-Identifier: MIT | ||
3 | */ | ||
4 | |||
5 | /* TODO | ||
6 | * - Add sysfs trigger for a preemption | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/proc_fs.h> // So we can set up entries in /proc | ||
12 | |||
13 | #include "nvdebug.h" | ||
14 | |||
15 | // LIAR. But without this we can't use GPL-only exported symbols like | ||
16 | // platform_bus_type or bus_find_device_by_name... | ||
17 | MODULE_LICENSE("GPL"); | ||
18 | MODULE_AUTHOR("Joshua Bakita"); | ||
19 | MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); | ||
20 | MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now | ||
21 | |||
22 | extern const struct file_operations runlist_file_ops; | ||
23 | |||
24 | int __init nvdebug_init(void) { | ||
25 | struct proc_dir_entry *entry = proc_create("runlist", 0444, NULL, &runlist_file_ops); | ||
26 | if (!entry) { | ||
27 | remove_proc_entry("runlist", NULL); | ||
28 | printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n"); | ||
29 | return -ENOMEM; | ||
30 | } | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | static void __exit nvdebug_exit(void) { | ||
35 | remove_proc_entry("runlist", NULL); | ||
36 | printk(KERN_INFO "[nvdebug] Exiting...\n"); | ||
37 | } | ||
38 | |||
39 | module_init(nvdebug_init); | ||
40 | module_exit(nvdebug_exit); | ||
diff --git a/runlist.c b/runlist.c new file mode 100644 index 0000000..8691b51 --- /dev/null +++ b/runlist.c | |||
@@ -0,0 +1,106 @@ | |||
1 | #include <linux/device.h> // For struct device, bus_find_device*() | ||
2 | //#include <linux/iommu.h> // For struct iommu_domain | ||
3 | #include <linux/kernel.h> // Kernel types | ||
4 | #include <asm/io.h> | ||
5 | |||
6 | #include "nvdebug.h" | ||
7 | |||
8 | // Bus types are global symbols in the kernel | ||
9 | extern struct bus_type platform_bus_type; | ||
10 | |||
11 | int get_runlist_iter(struct runlist_iter *rl_iter) { | ||
12 | struct device *dev = NULL; | ||
13 | struct device *temp_dev; | ||
14 | struct gk20a *g; | ||
15 | struct entry_tsg head; | ||
16 | runlist_base_t rl_base; | ||
17 | runlist_info_t rl_info; | ||
18 | u64 runlist_iova; | ||
19 | // Get the last device that matches our name | ||
20 | while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) { | ||
21 | dev = temp_dev; | ||
22 | printk(KERN_INFO "[nvdebug] Found a matching device %s\n", dev_name(dev)); | ||
23 | } | ||
24 | if (!dev) | ||
25 | return -EIO; | ||
26 | g = get_gk20a(dev); | ||
27 | // This address seems to not be: | ||
28 | // - A GPU address (type is sysmem_coherent) | ||
29 | // - A physical address (dereferencing after ioremap crashes) | ||
30 | // - A kernel virtual address (dereferencing segfaults) | ||
31 | // So maybe it's some sort of custom thing? This is an address that the GPU | ||
32 | // can use, so it would make most sense for it to be a physical address. | ||
33 | // | ||
34 | // BUT, it can't possibly be a physical address, as it would refer to an | ||
35 | // address greater than the maximum one on our system (by a lot!). | ||
36 | // Maybe I'm reading the runlist base wrong? | ||
37 | // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual | ||
38 | // address! So, what's this I/O address space? All I know is that it's what | ||
39 | // nvgpu_mem_get_addr() returns. That function returns the result of either: | ||
40 | // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) | ||
41 | // converts an IPA to a PA? | ||
42 | // - nvgpu_mem_iommu_translate | ||
43 | // | ||
44 | // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which | ||
45 | // returns SYSMEM. | ||
46 | // | ||
47 | // To convert a physical address to a IOMMU address, we add a bit | ||
48 | // | ||
49 | // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working | ||
50 | // before because the GPU had simply gone to sleep and invalidated its | ||
51 | // register state, so nvgpu_readl() was simply returning garbage. | ||
52 | |||
53 | printk(KERN_INFO "[nvdebug] Pulling runlist base address from %x\n", NV_PFIFO_RUNLIST_BASE); | ||
54 | printk(KERN_INFO "[nvdebug] Using struct gk20a* of %px\n", g); | ||
55 | printk(KERN_INFO "[nvdebug] g->name: %s, g->power_on: %d, g->sw_ready: %d, g->is_virtual %d\n", | ||
56 | g->name, g->power_on, g->sw_ready, g->is_virtual); | ||
57 | struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g); | ||
58 | printk(KERN_INFO "[nvdebug] l->regs %px, l->regs_saved %px\n", l->regs, l->regs_saved); | ||
59 | if (!l->regs) | ||
60 | return -EIO; | ||
61 | rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE); | ||
62 | rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); | ||
63 | runlist_iova = ((u64)rl_base.ptr) << 12; | ||
64 | printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n", | ||
65 | rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova); | ||
66 | // Segfaults | ||
67 | //u32 attempted_read = ioread32(runlist_iova); | ||
68 | //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read); | ||
69 | |||
70 | // Errors out | ||
71 | //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg)); | ||
72 | //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr); | ||
73 | |||
74 | /* Overcomplicated? | ||
75 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | ||
76 | if (!domain) { | ||
77 | printk(KERN_INFO "[nvdebug] No IOMMU domain!\n"); | ||
78 | return -EIO; | ||
79 | } | ||
80 | u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova); | ||
81 | printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr); | ||
82 | */ | ||
83 | |||
84 | printk(KERN_INFO "[nvdebug] Runlist phys_to_virt: %px\n", (void*)phys_to_virt(runlist_iova)); | ||
85 | printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt: %x\n", *(u32*)phys_to_virt(runlist_iova)); | ||
86 | head = *(struct entry_tsg*)phys_to_virt(runlist_iova); | ||
87 | |||
88 | rl_iter->curr_tsg = (struct entry_tsg*)phys_to_virt(runlist_iova); | ||
89 | rl_iter->rl_info = rl_info; | ||
90 | return 0; | ||
91 | //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); | ||
92 | //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); | ||
93 | //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); | ||
94 | //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); | ||
95 | //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); | ||
96 | |||
97 | //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL)); | ||
98 | //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL)); | ||
99 | //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes | ||
100 | //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg)); | ||
101 | /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); | ||
102 | printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); | ||
103 | printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); | ||
104 | printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); | ||
105 | printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */ | ||
106 | } | ||
diff --git a/runlist_procfs.c b/runlist_procfs.c new file mode 100644 index 0000000..2107bd4 --- /dev/null +++ b/runlist_procfs.c | |||
@@ -0,0 +1,102 @@ | |||
1 | #include <linux/seq_file.h> // For seq_* functions and types | ||
2 | |||
3 | #include "nvdebug.h" | ||
4 | |||
5 | #define RUNLIST_PROCFS_NAME "runlist" | ||
6 | |||
7 | static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) { | ||
8 | static struct runlist_iter rl_iter; | ||
9 | |||
10 | // *pos == 0 for first call after read of file | ||
11 | if (*pos == 0) { | ||
12 | int err = get_runlist_iter(&rl_iter); | ||
13 | if (err) | ||
14 | return NULL; | ||
15 | return &rl_iter; | ||
16 | } | ||
17 | // When called with *pos != 0, we already traversed the runlist | ||
18 | return NULL; | ||
19 | } | ||
20 | |||
21 | static void* runlist_file_seq_next(struct seq_file *s, void *raw_rl_iter, | ||
22 | loff_t *pos) { | ||
23 | struct runlist_iter* rl_iter = raw_rl_iter; | ||
24 | // Advance by one TSG + channels under last TSG | ||
25 | *pos += 1 + rl_iter->curr_tsg->tsg_length; | ||
26 | // Verify we haven't reached the end of the runlist | ||
27 | // rl_info.len is the num of tsg entries + total num of channel entries | ||
28 | if (*pos < rl_iter->rl_info.len) { | ||
29 | rl_iter->curr_tsg = next_tsg(rl_iter->curr_tsg); | ||
30 | return rl_iter; | ||
31 | } | ||
32 | return NULL; | ||
33 | } | ||
34 | |||
35 | static void runlist_file_seq_stop(struct seq_file *s, void *raw_rl_iter) { | ||
36 | // No cleanup needed | ||
37 | } | ||
38 | |||
39 | static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { | ||
40 | struct entry_tsg* tsg = ((struct runlist_iter*)raw_rl_iter)->curr_tsg; | ||
41 | struct runlist_chan* chan; | ||
42 | if (tsg->entry_type != ENTRY_TYPE_TSG) { | ||
43 | printk(KERN_WARNING "[nvdebug] Attempted to print non-TSG in nvdebug_print_tsg()!\n"); | ||
44 | return -EIO; | ||
45 | } | ||
46 | seq_printf(s, "+---- TSG Entry %-2d----+\n", tsg->tsgid); | ||
47 | seq_printf(s, "| Scale: %-13d|\n", tsg->timeslice_scale); | ||
48 | seq_printf(s, "| Timeout: %-11d|\n", tsg->timeslice_timeout); | ||
49 | seq_printf(s, "+---------------------+\n"); | ||
50 | for_chan_in_tsg(chan, tsg) { | ||
51 | char* loc_txt; | ||
52 | u64 instance_ptr; | ||
53 | if (chan->entry_type != ENTRY_TYPE_CHAN) { | ||
54 | printk(KERN_WARNING "[nvdebug] Attempted to print non-channel in nvdebug_print_channel()!\n"); | ||
55 | return -EIO; | ||
56 | } | ||
57 | switch (chan->inst_target) { | ||
58 | case TARGET_VID_MEM: | ||
59 | loc_txt = "VID_MEM"; | ||
60 | break; | ||
61 | case TARGET_SYS_MEM_COHERENT: | ||
62 | loc_txt = "SYS_MEM_COHERENT"; | ||
63 | break; | ||
64 | case TARGET_SYS_MEM_NONCOHERENT: | ||
65 | loc_txt = "SYS_MEM_NONCOHERENT"; | ||
66 | break; | ||
67 | default: | ||
68 | printk(KERN_WARNING "[nvdebug] Invalid aperture in runlist channel!\n"); | ||
69 | return -EIO; | ||
70 | } | ||
71 | // Reconstruct pointer to channel instance block | ||
72 | instance_ptr = chan->inst_ptr_hi; | ||
73 | instance_ptr <<= 32; | ||
74 | instance_ptr |= chan->inst_ptr_lo << 12; | ||
75 | |||
76 | seq_printf(s, " +- Channel Entry %-4d-+\n", chan->chid); | ||
77 | seq_printf(s, " | Runqueue Selector: %d|\n", chan->runqueue_selector); | ||
78 | seq_printf(s, " | Instance PTR: |\n"); | ||
79 | seq_printf(s, " | %#018llx |\n", instance_ptr); | ||
80 | seq_printf(s, " | %-20s|\n", loc_txt); | ||
81 | seq_printf(s, " +---------------------+\n"); | ||
82 | } | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static const struct seq_operations runlist_file_seq_ops = { | ||
87 | .start = runlist_file_seq_start, | ||
88 | .next = runlist_file_seq_next, | ||
89 | .stop = runlist_file_seq_stop, | ||
90 | .show = runlist_file_seq_show, | ||
91 | }; | ||
92 | |||
93 | static int runlist_file_open(struct inode *inode, struct file *f) { | ||
94 | return seq_open(f, &runlist_file_seq_ops); | ||
95 | } | ||
96 | |||
97 | const struct file_operations runlist_file_ops = { | ||
98 | .open = runlist_file_open, | ||
99 | .read = seq_read, | ||
100 | .llseek = seq_lseek, | ||
101 | .release = seq_release, | ||
102 | }; | ||