aboutsummaryrefslogtreecommitdiffstats
path: root/runlist.c
diff options
context:
space:
mode:
Diffstat (limited to 'runlist.c')
-rw-r--r--runlist.c221
1 files changed, 113 insertions, 108 deletions
diff --git a/runlist.c b/runlist.c
index c8ff99f..94be18e 100644
--- a/runlist.c
+++ b/runlist.c
@@ -1,122 +1,127 @@
1#include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type
2//#include <linux/iommu.h> // For struct iommu_domain
3#include <linux/kernel.h> // Kernel types 1#include <linux/kernel.h> // Kernel types
4#include <asm/io.h>
5 2
6#include "nvdebug.h" 3#include "nvdebug.h"
7 4
8// Bus types are global symbols in the kernel
9extern struct bus_type platform_bus_type;
10
11struct gk20a* get_live_gk20a(void) {
12 struct device *dev = NULL;
13 struct device *temp_dev;
14 struct gk20a *g;
15 struct nvgpu_os_linux *l;
16 // Get the last device that matches our name
17 while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) {
18 dev = temp_dev;
19 printk(KERN_INFO "[nvdebug] Found a matching device %s\n", dev_name(dev));
20 }
21 if (!dev)
22 return NULL;
23 g = get_gk20a(dev);
24 // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be:
25 // - A GPU address (type is sysmem_coherent)
26 // - A physical address (dereferencing after ioremap crashes)
27 // - A kernel virtual address (dereferencing segfaults)
28 // So maybe it's some sort of custom thing? This is an address that the GPU
29 // can use, so it would make most sense for it to be a physical address.
30 //
31 // BUT, it can't possibly be a physical address, as it would refer to an
32 // address greater than the maximum one on our system (by a lot!).
33 // Maybe I'm reading the runlist base wrong?
34 // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual
35 // address! So, what's this I/O address space? All I know is that it's what
36 // nvgpu_mem_get_addr() returns. That function returns the result of either:
37 // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?)
38 // converts an IPA to a PA?
39 // - nvgpu_mem_iommu_translate
40 //
41 // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which
42 // returns SYSMEM.
43 //
44 // To convert a physical address to a IOMMU address, we add a bit
45 //
46 // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working
47 // before because the GPU had simply gone to sleep and invalidated its
48 // register state, so nvgpu_readl() was simply returning garbage.
49 l = container_of(g, struct nvgpu_os_linux, g);
50 if (!l->regs)
51 return NULL;
52 return g;
53}
54
55/* Get runlist head and info (incl. length) 5/* Get runlist head and info (incl. length)
56 @param rl_iter Location at which to store output 6 @param rl_iter Location at which to store output
7 @param rl_id Which runlist to obtain?
57*/ 8*/
58int get_runlist_iter(struct runlist_iter *rl_iter) { 9int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter) {
59 struct entry_tsg head; 10 runlist_base_t rl_base;
60 runlist_base_t rl_base; 11 runlist_info_t rl_info;
61 runlist_info_t rl_info; 12 u64 runlist_iova;
62 u64 runlist_iova; 13 *rl_iter = (struct runlist_iter){0};
63 struct gk20a *g = get_live_gk20a(); 14 rl_base.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST_BASE(rl_id));
64 if (!g) 15 // Check that reads are working
16 if (rl_base.raw == -1)
65 return -EIO; 17 return -EIO;
66 rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE); 18 // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be:
67 rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); 19 // - A GPU address (type is sysmem_coherent)
68 runlist_iova = ((u64)rl_base.ptr) << 12; 20 // - A physical address (dereferencing after ioremap crashes)
69 printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n", 21 // - A kernel virtual address (dereferencing segfaults)
70 rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova); 22 // So maybe it's some sort of custom thing? This is an address that the GPU
71 // TODO: Support reading video memory 23 // can use, so it would make most sense for it to be a physical address.
72 if (rl_base.type == TARGET_VID_MEM) { 24 //
73 printk(KERN_ERR "[nvdebug] Runlist is located in video memory. Access to video memory is unimplemented."); 25 // BUT, it can't possibly be a physical address, as it would refer to an
74 return -ENOTSUPP; 26 // address greater than the maximum one on our system (by a lot!).
27 // Maybe I'm reading the runlist base wrong?
28 // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual
29 // address! So, what's this I/O address space? All I know is that it's what
30 // nvgpu_mem_get_addr() returns. That function returns the result of either:
31 // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?)
32 // converts an IPA to a PA?
33 // - nvgpu_mem_iommu_translate
34 //
35 // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which
36 // returns SYSMEM.
37 //
38 // To convert a physical address to a IOMMU address, we add a bit
39 //
40 // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working
41 // before because the GPU had simply gone to sleep and invalidated its
42 // register state, so nvgpu_readl() was simply returning garbage.
43 rl_info.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST(rl_id));
44 runlist_iova = ((u64)rl_base.ptr) << 12;
45 printk(KERN_INFO "[nvdebug] Runlist %d @ %llx in %s (config raw: %x)\n",
46 rl_id, runlist_iova, target_to_text(rl_base.target), rl_base.raw);
47 printk(KERN_INFO "[nvdebug] Runlist length %d, ID %d\n", rl_info.len, rl_info.id);
48 // Return early on an empty runlist
49 if (!rl_info.len)
50 return 0;
51 // If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping
52 if (rl_base.target == TARGET_VID_MEM) {
53 printk(KERN_WARNING "[nvdebug] Runlist is located in video memory. Access to video memory is experimental.");
54 bar_config_block_t bar1_block, bar2_block;
55 bar1_block.raw = nvdebug_readl(g, NV_PBUS_BAR1_BLOCK);
56 printk(KERN_INFO "[nvdebug] BAR1 inst block @ %llx in %s's %s address space.\n", ((u64)bar1_block.ptr) << 12, target_to_text(bar1_block.target), bar1_block.is_virtual ? "virtual" : "physical");
57 bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK);
58 printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar1_block.is_virtual ? "virtual" : "physical");
59 uint32_t bar_inst_pramin_offset = vram2PRAMIN(g, (uint64_t)bar2_block.ptr << 12);
60 if (!bar_inst_pramin_offset) {
61 printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n");
62 return -EOPNOTSUPP;
63 }
64 /* TODO: Support BAR1?
65 bar_inst_pramin_offset = vram2PRAMIN(g, bar1_block.ptr << 12);
66 if (!bar_inst_pramin_offset) {
67 printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR1 in the current NV_PRAMIN window. VRAM inaccessible.\n");
68 return -EOPNOTSUPP;
69 }*/
70 // Instance blocks (size == 1kb) contain many things, but we only care about
71 // the section which describes the location of the page directory (page table)
72 uint32_t bar_pdb_config_pramin_offset = bar_inst_pramin_offset + NV_PRAMIN_PDB_CONFIG_OFF;
73 page_dir_config_t pd_config;
74 pd_config.raw = nvdebug_readq(g, bar_pdb_config_pramin_offset + NV_PRAMIN);
75 uint64_t bar_pdb_vram_addr = pd_config.page_dir_hi;
76 bar_pdb_vram_addr <<= 20;
77 bar_pdb_vram_addr |= pd_config.page_dir_lo;
78 bar_pdb_vram_addr <<= 12;
79 printk(KERN_INFO "[nvdebug] BAR2 PDB @ %llx in %s of version %s (config raw: %llx)\n", bar_pdb_vram_addr, target_to_text(pd_config.target), pd_config.is_ver2 ? "2" : "1", pd_config.raw);
80 // TODO: SYSMEM support for page table location
81 if (pd_config.target != TARGET_VID_MEM) {
82 printk(KERN_WARNING "[nvdebug] BAR2 PDB is in an unsupported location.\n");
83 return -EOPNOTSUPP;
84 }
85 uint32_t bar_pdb_pramin_offset = vram2PRAMIN(g, bar_pdb_vram_addr);
86 if (!bar_pdb_pramin_offset) {
87 printk(KERN_WARNING "[nvdebug] Unable to find page directory BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n");
88 return -EOPNOTSUPP;
89 }
90 uint64_t runlist_bar_vaddr;
91 if (pd_config.is_ver2)
92 runlist_bar_vaddr = search_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova);
93 else
94 runlist_bar_vaddr = search_v1_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova);
95 if (!runlist_bar_vaddr) {
96 printk(KERN_WARNING "[nvdebug] Unable to find runlist mapping in BAR2/3 page tables.\n");
97 return -EOPNOTSUPP;
98 }
99 printk(KERN_INFO "[nvdebug] Runlist @ %llx in BAR2 virtual address space.\n", runlist_bar_vaddr);
100 /* XXX: Old test code
101 uint32_t bar2_pd_pramin_offset = vram_to_pramin_off(bar2_pd);
102 //walk_pd_subtree(bar2_pd_pramin_offset);
103 uint64_t runlist_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, runlist_iova);
104 page_dir_entry_t pde_0;
105 pde_0.raw = nvdebug_readl(g, NV_PRAMIN + bar2_pd_pramin_offset);
106 uint32_t pde_1 = nvdebug_readl(g, NV_PRAMIN + vram_to_pramin_off(((u64)pde_0.addr) << 12));
107 uint64_t pde_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, ((u64)pde_0.addr) << 12);
108 uint32_t pde_2 = readl(g->bar3 + pde_bar2_vaddr);
109 printk(KERN_INFO "[nvdebug] PDE0 via PRAMIN: %x, via BAR3: %x\n", pde_1, pde_2);
110 */
111 if (!g->bar3) {
112 printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped.\n");
113 return -ENODEV;
114 }
115 rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr;
116 } else {
117 // Directly access the runlist if stored in SYS_MEM (physically addressed)
118 rl_iter->curr_entry = phys_to_virt(runlist_iova);
75 } 119 }
76 // Segfaults 120 rl_iter->rl_info = rl_info;
77 //u32 attempted_read = ioread32(runlist_iova); 121 return 0;
78 //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read);
79
80 // Errors out
81 //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg));
82 //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr);
83
84 /* Overcomplicated?
85 struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
86 if (!domain) {
87 printk(KERN_INFO "[nvdebug] No IOMMU domain!\n");
88 return -EIO;
89 }
90 u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova);
91 printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr);
92 */
93
94 printk(KERN_INFO "[nvdebug] Runlist phys_to_virt: %px\n", (void*)phys_to_virt(runlist_iova));
95 printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt: %x\n", *(u32*)phys_to_virt(runlist_iova));
96 head = *(struct entry_tsg*)phys_to_virt(runlist_iova);
97
98 rl_iter->curr_tsg = (struct entry_tsg*)phys_to_virt(runlist_iova);
99 rl_iter->rl_info = rl_info;
100 return 0;
101 //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type);
102 //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale);
103 //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout);
104 //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
105 //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid);
106
107 //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL));
108 //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL));
109 //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes
110 //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg));
111 /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type);
112 printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale);
113 printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout);
114 printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
115 printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */
116} 122}
117 123
118int preempt_tsg(uint32_t tsg_id) { 124int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) {
119 struct gk20a *g = get_live_gk20a();
120 runlist_info_t rl_info; 125 runlist_info_t rl_info;
121 pfifo_preempt_t pfifo_preempt; 126 pfifo_preempt_t pfifo_preempt;
122 runlist_disable_t rl_disable; 127 runlist_disable_t rl_disable;