diff options
Diffstat (limited to 'runlist.c')
-rw-r--r-- | runlist.c | 221 |
1 files changed, 113 insertions, 108 deletions
@@ -1,122 +1,127 @@ | |||
1 | #include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type | ||
2 | //#include <linux/iommu.h> // For struct iommu_domain | ||
3 | #include <linux/kernel.h> // Kernel types | 1 | #include <linux/kernel.h> // Kernel types |
4 | #include <asm/io.h> | ||
5 | 2 | ||
6 | #include "nvdebug.h" | 3 | #include "nvdebug.h" |
7 | 4 | ||
8 | // Bus types are global symbols in the kernel | ||
9 | extern struct bus_type platform_bus_type; | ||
10 | |||
11 | struct gk20a* get_live_gk20a(void) { | ||
12 | struct device *dev = NULL; | ||
13 | struct device *temp_dev; | ||
14 | struct gk20a *g; | ||
15 | struct nvgpu_os_linux *l; | ||
16 | // Get the last device that matches our name | ||
17 | while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) { | ||
18 | dev = temp_dev; | ||
19 | printk(KERN_INFO "[nvdebug] Found a matching device %s\n", dev_name(dev)); | ||
20 | } | ||
21 | if (!dev) | ||
22 | return NULL; | ||
23 | g = get_gk20a(dev); | ||
24 | // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be: | ||
25 | // - A GPU address (type is sysmem_coherent) | ||
26 | // - A physical address (dereferencing after ioremap crashes) | ||
27 | // - A kernel virtual address (dereferencing segfaults) | ||
28 | // So maybe it's some sort of custom thing? This is an address that the GPU | ||
29 | // can use, so it would make most sense for it to be a physical address. | ||
30 | // | ||
31 | // BUT, it can't possibly be a physical address, as it would refer to an | ||
32 | // address greater than the maximum one on our system (by a lot!). | ||
33 | // Maybe I'm reading the runlist base wrong? | ||
34 | // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual | ||
35 | // address! So, what's this I/O address space? All I know is that it's what | ||
36 | // nvgpu_mem_get_addr() returns. That function returns the result of either: | ||
37 | // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) | ||
38 | // converts an IPA to a PA? | ||
39 | // - nvgpu_mem_iommu_translate | ||
40 | // | ||
41 | // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which | ||
42 | // returns SYSMEM. | ||
43 | // | ||
44 | // To convert a physical address to a IOMMU address, we add a bit | ||
45 | // | ||
46 | // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working | ||
47 | // before because the GPU had simply gone to sleep and invalidated its | ||
48 | // register state, so nvgpu_readl() was simply returning garbage. | ||
49 | l = container_of(g, struct nvgpu_os_linux, g); | ||
50 | if (!l->regs) | ||
51 | return NULL; | ||
52 | return g; | ||
53 | } | ||
54 | |||
55 | /* Get runlist head and info (incl. length) | 5 | /* Get runlist head and info (incl. length) |
56 | @param rl_iter Location at which to store output | 6 | @param rl_iter Location at which to store output |
7 | @param rl_id Which runlist to obtain? | ||
57 | */ | 8 | */ |
58 | int get_runlist_iter(struct runlist_iter *rl_iter) { | 9 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter) { |
59 | struct entry_tsg head; | 10 | runlist_base_t rl_base; |
60 | runlist_base_t rl_base; | 11 | runlist_info_t rl_info; |
61 | runlist_info_t rl_info; | 12 | u64 runlist_iova; |
62 | u64 runlist_iova; | 13 | *rl_iter = (struct runlist_iter){0}; |
63 | struct gk20a *g = get_live_gk20a(); | 14 | rl_base.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST_BASE(rl_id)); |
64 | if (!g) | 15 | // Check that reads are working |
16 | if (rl_base.raw == -1) | ||
65 | return -EIO; | 17 | return -EIO; |
66 | rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE); | 18 | // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be: |
67 | rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); | 19 | // - A GPU address (type is sysmem_coherent) |
68 | runlist_iova = ((u64)rl_base.ptr) << 12; | 20 | // - A physical address (dereferencing after ioremap crashes) |
69 | printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n", | 21 | // - A kernel virtual address (dereferencing segfaults) |
70 | rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova); | 22 | // So maybe it's some sort of custom thing? This is an address that the GPU |
71 | // TODO: Support reading video memory | 23 | // can use, so it would make most sense for it to be a physical address. |
72 | if (rl_base.type == TARGET_VID_MEM) { | 24 | // |
73 | printk(KERN_ERR "[nvdebug] Runlist is located in video memory. Access to video memory is unimplemented."); | 25 | // BUT, it can't possibly be a physical address, as it would refer to an |
74 | return -ENOTSUPP; | 26 | // address greater than the maximum one on our system (by a lot!). |
27 | // Maybe I'm reading the runlist base wrong? | ||
28 | // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual | ||
29 | // address! So, what's this I/O address space? All I know is that it's what | ||
30 | // nvgpu_mem_get_addr() returns. That function returns the result of either: | ||
31 | // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) | ||
32 | // converts an IPA to a PA? | ||
33 | // - nvgpu_mem_iommu_translate | ||
34 | // | ||
35 | // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which | ||
36 | // returns SYSMEM. | ||
37 | // | ||
38 | // To convert a physical address to a IOMMU address, we add a bit | ||
39 | // | ||
40 | // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working | ||
41 | // before because the GPU had simply gone to sleep and invalidated its | ||
42 | // register state, so nvgpu_readl() was simply returning garbage. | ||
43 | rl_info.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST(rl_id)); | ||
44 | runlist_iova = ((u64)rl_base.ptr) << 12; | ||
45 | printk(KERN_INFO "[nvdebug] Runlist %d @ %llx in %s (config raw: %x)\n", | ||
46 | rl_id, runlist_iova, target_to_text(rl_base.target), rl_base.raw); | ||
47 | printk(KERN_INFO "[nvdebug] Runlist length %d, ID %d\n", rl_info.len, rl_info.id); | ||
48 | // Return early on an empty runlist | ||
49 | if (!rl_info.len) | ||
50 | return 0; | ||
51 | // If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping | ||
52 | if (rl_base.target == TARGET_VID_MEM) { | ||
53 | printk(KERN_WARNING "[nvdebug] Runlist is located in video memory. Access to video memory is experimental."); | ||
54 | bar_config_block_t bar1_block, bar2_block; | ||
55 | bar1_block.raw = nvdebug_readl(g, NV_PBUS_BAR1_BLOCK); | ||
56 | printk(KERN_INFO "[nvdebug] BAR1 inst block @ %llx in %s's %s address space.\n", ((u64)bar1_block.ptr) << 12, target_to_text(bar1_block.target), bar1_block.is_virtual ? "virtual" : "physical"); | ||
57 | bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK); | ||
58 | printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar1_block.is_virtual ? "virtual" : "physical"); | ||
59 | uint32_t bar_inst_pramin_offset = vram2PRAMIN(g, (uint64_t)bar2_block.ptr << 12); | ||
60 | if (!bar_inst_pramin_offset) { | ||
61 | printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n"); | ||
62 | return -EOPNOTSUPP; | ||
63 | } | ||
64 | /* TODO: Support BAR1? | ||
65 | bar_inst_pramin_offset = vram2PRAMIN(g, bar1_block.ptr << 12); | ||
66 | if (!bar_inst_pramin_offset) { | ||
67 | printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR1 in the current NV_PRAMIN window. VRAM inaccessible.\n"); | ||
68 | return -EOPNOTSUPP; | ||
69 | }*/ | ||
70 | // Instance blocks (size == 1kb) contain many things, but we only care about | ||
71 | // the section which describes the location of the page directory (page table) | ||
72 | uint32_t bar_pdb_config_pramin_offset = bar_inst_pramin_offset + NV_PRAMIN_PDB_CONFIG_OFF; | ||
73 | page_dir_config_t pd_config; | ||
74 | pd_config.raw = nvdebug_readq(g, bar_pdb_config_pramin_offset + NV_PRAMIN); | ||
75 | uint64_t bar_pdb_vram_addr = pd_config.page_dir_hi; | ||
76 | bar_pdb_vram_addr <<= 20; | ||
77 | bar_pdb_vram_addr |= pd_config.page_dir_lo; | ||
78 | bar_pdb_vram_addr <<= 12; | ||
79 | printk(KERN_INFO "[nvdebug] BAR2 PDB @ %llx in %s of version %s (config raw: %llx)\n", bar_pdb_vram_addr, target_to_text(pd_config.target), pd_config.is_ver2 ? "2" : "1", pd_config.raw); | ||
80 | // TODO: SYSMEM support for page table location | ||
81 | if (pd_config.target != TARGET_VID_MEM) { | ||
82 | printk(KERN_WARNING "[nvdebug] BAR2 PDB is in an unsupported location.\n"); | ||
83 | return -EOPNOTSUPP; | ||
84 | } | ||
85 | uint32_t bar_pdb_pramin_offset = vram2PRAMIN(g, bar_pdb_vram_addr); | ||
86 | if (!bar_pdb_pramin_offset) { | ||
87 | printk(KERN_WARNING "[nvdebug] Unable to find page directory BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n"); | ||
88 | return -EOPNOTSUPP; | ||
89 | } | ||
90 | uint64_t runlist_bar_vaddr; | ||
91 | if (pd_config.is_ver2) | ||
92 | runlist_bar_vaddr = search_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova); | ||
93 | else | ||
94 | runlist_bar_vaddr = search_v1_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova); | ||
95 | if (!runlist_bar_vaddr) { | ||
96 | printk(KERN_WARNING "[nvdebug] Unable to find runlist mapping in BAR2/3 page tables.\n"); | ||
97 | return -EOPNOTSUPP; | ||
98 | } | ||
99 | printk(KERN_INFO "[nvdebug] Runlist @ %llx in BAR2 virtual address space.\n", runlist_bar_vaddr); | ||
100 | /* XXX: Old test code | ||
101 | uint32_t bar2_pd_pramin_offset = vram_to_pramin_off(bar2_pd); | ||
102 | //walk_pd_subtree(bar2_pd_pramin_offset); | ||
103 | uint64_t runlist_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, runlist_iova); | ||
104 | page_dir_entry_t pde_0; | ||
105 | pde_0.raw = nvdebug_readl(g, NV_PRAMIN + bar2_pd_pramin_offset); | ||
106 | uint32_t pde_1 = nvdebug_readl(g, NV_PRAMIN + vram_to_pramin_off(((u64)pde_0.addr) << 12)); | ||
107 | uint64_t pde_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, ((u64)pde_0.addr) << 12); | ||
108 | uint32_t pde_2 = readl(g->bar3 + pde_bar2_vaddr); | ||
109 | printk(KERN_INFO "[nvdebug] PDE0 via PRAMIN: %x, via BAR3: %x\n", pde_1, pde_2); | ||
110 | */ | ||
111 | if (!g->bar3) { | ||
112 | printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped.\n"); | ||
113 | return -ENODEV; | ||
114 | } | ||
115 | rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr; | ||
116 | } else { | ||
117 | // Directly access the runlist if stored in SYS_MEM (physically addressed) | ||
118 | rl_iter->curr_entry = phys_to_virt(runlist_iova); | ||
75 | } | 119 | } |
76 | // Segfaults | 120 | rl_iter->rl_info = rl_info; |
77 | //u32 attempted_read = ioread32(runlist_iova); | 121 | return 0; |
78 | //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read); | ||
79 | |||
80 | // Errors out | ||
81 | //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg)); | ||
82 | //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr); | ||
83 | |||
84 | /* Overcomplicated? | ||
85 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | ||
86 | if (!domain) { | ||
87 | printk(KERN_INFO "[nvdebug] No IOMMU domain!\n"); | ||
88 | return -EIO; | ||
89 | } | ||
90 | u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova); | ||
91 | printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr); | ||
92 | */ | ||
93 | |||
94 | printk(KERN_INFO "[nvdebug] Runlist phys_to_virt: %px\n", (void*)phys_to_virt(runlist_iova)); | ||
95 | printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt: %x\n", *(u32*)phys_to_virt(runlist_iova)); | ||
96 | head = *(struct entry_tsg*)phys_to_virt(runlist_iova); | ||
97 | |||
98 | rl_iter->curr_tsg = (struct entry_tsg*)phys_to_virt(runlist_iova); | ||
99 | rl_iter->rl_info = rl_info; | ||
100 | return 0; | ||
101 | //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); | ||
102 | //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); | ||
103 | //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); | ||
104 | //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); | ||
105 | //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); | ||
106 | |||
107 | //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL)); | ||
108 | //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL)); | ||
109 | //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes | ||
110 | //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg)); | ||
111 | /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); | ||
112 | printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); | ||
113 | printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); | ||
114 | printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); | ||
115 | printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */ | ||
116 | } | 122 | } |
117 | 123 | ||
118 | int preempt_tsg(uint32_t tsg_id) { | 124 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) { |
119 | struct gk20a *g = get_live_gk20a(); | ||
120 | runlist_info_t rl_info; | 125 | runlist_info_t rl_info; |
121 | pfifo_preempt_t pfifo_preempt; | 126 | pfifo_preempt_t pfifo_preempt; |
122 | runlist_disable_t rl_disable; | 127 | runlist_disable_t rl_disable; |